├── .github └── workflows │ └── release.yml ├── .gitignore ├── LICENSE ├── README.md ├── docs ├── .nojekyll ├── README.md ├── _sidebar.md ├── assets │ └── pseudoknot.png ├── development │ └── README.md ├── index.html ├── setup │ ├── environment.md │ ├── install.md │ └── packages.md ├── sherlock │ ├── README.md │ ├── environment.md │ └── jobs.md └── usage │ ├── README.md │ ├── pseudoknots.md │ ├── structure_prediction.md │ └── utilities.md ├── example_arnie_file.txt ├── examples ├── data_for_examples │ └── ribologic_SI.txt └── start_here.ipynb ├── notebooks ├── IntroToArnie.ipynb └── README.md ├── parameter_files ├── contrafold.params.complementary ├── contrafold.params.noncomplementary ├── learntofold.contrafold.params ├── rna_andronescu2007.par ├── rna_langdon2018.par ├── rna_turner1999.par └── rna_turner2004.par ├── pyproject.toml ├── pytest.ini ├── scripts ├── score_pseudoacc_mea.py ├── write_bpp_matrices.py └── write_unpaired_vectors.py ├── src └── arnie │ ├── __init__.py │ ├── bpps.py │ ├── free_energy.py │ ├── mea │ ├── __init__.py │ ├── mea.py │ ├── mea_utils.py │ └── threshknot.py │ ├── mfe.py │ ├── mfe_bootstrap.py │ ├── pfunc.py │ ├── pk_predictors.py │ ├── sample_structures.py │ ├── utils.py │ └── viz.py └── tests ├── __init__.py ├── test_bpps.py ├── test_converters.py ├── test_evaluation_metrics.py ├── test_file_readers.py ├── test_files ├── samiv_eternafold.prob ├── seq.bpseq ├── seq.ct └── seq.prob ├── test_helix_getting_and_removing.py ├── test_linearpartition.py ├── test_pfunc.py ├── test_pk.py ├── test_sample_struct.py ├── test_settings.py ├── test_structure_handling.py └── test_vfold_versions.py /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python package to PyPI 2 | 3 | on: push 4 | 5 | jobs: 6 | build: 7 | name: Build distribution package 8 | runs-on: "ubuntu-latest" 9 | 10 | steps: 11 | - name: Checkout source 12 | uses: actions/checkout@v4 13 | 14 | - name: Set up Python 3.12 15 | uses: actions/setup-python@v5 16 | with: 17 | python-version: "3.12" 18 | 19 | - name: Install pypa/build 20 | run: python3 -m pip install build --user 21 | 22 | - name: Build a binary wheel and a source tarball 23 | run: python3 -m build 24 | 25 | - name: Store the distribution packages 26 | uses: actions/upload-artifact@v4 27 | with: 28 | name: release-distributions 29 | path: dist/ 30 | 31 | publish-to-pypi: 32 | name: Publish Python distribution to PyPI 33 | if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes 34 | needs: 35 | - build 36 | runs-on: ubuntu-latest 37 | environment: 38 | name: pypi 39 | url: https://pypi.org/p/arnie 40 | permissions: 41 | id-token: write # IMPORTANT: mandatory for trusted publishing 42 | 43 | steps: 44 | - name: Download all the dists 45 | uses: actions/download-artifact@v4 46 | with: 47 | name: release-distributions 48 | path: dist/ 49 | - name: Publish distribution to PyPI 50 | uses: pypa/gh-action-pypi-publish@release/v1 51 | 52 | github-release: 53 | name: >- 54 | Sign the Python distribution with Sigstore 55 | and upload them to GitHub Release 56 | needs: 57 | - publish-to-pypi 58 | runs-on: ubuntu-latest 59 | 60 | permissions: 61 | contents: write # IMPORTANT: mandatory for making GitHub Releases 62 | id-token: write # IMPORTANT: mandatory for sigstore 63 | 64 | steps: 65 | - name: Download all the dists 66 | uses: actions/download-artifact@v4 67 | with: 68 | name: release-distributions 69 | path: dist/ 70 | - name: Sign the dists with Sigstore 71 | uses: sigstore/gh-action-sigstore-python@v1.2.3 72 | with: 73 | inputs: >- 74 | ./dist/*.tar.gz 75 | ./dist/*.whl 76 | - name: Create GitHub Release 77 | env: 78 | GITHUB_TOKEN: ${{ github.token }} 79 | run: >- 80 | gh release create 81 | '${{ github.ref_name }}' 82 | --repo '${{ github.repository }}' 83 | --notes "" 84 | - name: Upload artifact signatures to GitHub Release 85 | env: 86 | GITHUB_TOKEN: ${{ github.token }} 87 | # Upload to GitHub Release using the `gh` CLI. 88 | # `dist/` contains the built packages, and the 89 | # sigstore-produced signatures and certificates. 90 | run: >- 91 | gh release upload 92 | '${{ github.ref_name }}' dist/** 93 | --repo '${{ github.repository }}' 94 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | __pycache__/* 3 | .ipynb_checkpoints/* 4 | */.ipynb_checkpoints/* 5 | rna.ps 6 | *.arnie 7 | dist/* -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Leland Stanford Junior University 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # arnie 2 | Python API to compute RNA energetics and do structure prediction across multiple secondary structure packages. 3 | 4 | ## Documentation 5 | [See our full docs.](https://daslab.github.io/arnie) 6 | 7 | ## Install 8 | `arnie` is [available on PyPI](https://pypi.org/project/arnie/). 9 | 10 | `pip install arnie` 11 | 12 | ## Repo Organization 13 | 14 | `src/arnie`: source code for the arnie package. 15 | 16 | `docs`: docsify-based markdown documentation for the arnie package. 17 | 18 | `tests`: unit tests 19 | 20 | `notebooks`: example jupyter notebooks with usage. 21 | 22 | `scripts`: scripts for processing sequences in batch. 23 | 24 | `parameter_files`: dir of various parameter files for packages, put here out of convenience. 25 | 26 | 27 | (c) 2024 Leland Stanford Jr University 28 | Authors: 29 | Hannah Wayment-Steele -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/docs/.nojekyll -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # arnie 2 | Arnie is a Python API to compute RNA energetics and do structure prediction across multiple secondary structure packages. 3 | 4 | ## Install 5 | `arnie` is [available on PyPI](https://pypi.org/project/arnie/). 6 | 7 | `pip install arnie` 8 | 9 | ## Simple Setup 10 | Arnie works by delegating calls for structure predictions to various RNA prediction libraries. To use arnie we need to have these libraries installed, and we need to point to these their installed locations with environment variables. Here we will use [Eternafold](https://github.com/eternagame/Eternafold) which is simple to install via [Bioconda](https://bioconda.github.io/recipes/eternafold/README.html). This example assumes you have conda installed already; see the full [setup page](/setup/environment.md) for more details about setting up an arnie environment. 11 | 12 | 13 | ``` 14 | conda install -c bioconda eternafold 15 | export eternafold_PATH=/path/to/installed/location 16 | ``` 17 | 18 | ## Usage: 19 | 20 | See the [usage docs](/usage/structure_prediction) for example syntax. In brief, comparing across packages is simple. For computing base pairing probability matrices: 21 | 22 | ``` 23 | from arnie.bpps import bpps 24 | 25 | bpps_dict = {} 26 | my_sequence = 'CGCUGUCUGUACUUGUAUCAGUACACUGACGAGUCCCUAAAGGACGAAACAGCG' 27 | 28 | for pkg in ['vienna','nupack','RNAstructure','contrafold','RNAsoft']: 29 | bpps_dict[pkg] = bpps(my_sequence, package=pkg) 30 | ``` 31 | 32 | (c) 2024 [Das Lab](https://daslab.stanford.edu/), Leland Stanford Jr University -------------------------------------------------------------------------------- /docs/_sidebar.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | - [Home](README.md) 4 | - Getting Started 5 | - [Installation](setup/install.md) 6 | - [Environment](setup/environment.md) 7 | - Using Arnie 8 | - [Basics](usage/README.md) 9 | - [Structure Prediction](usage/structure_prediction.md) 10 | - [Pseudoknot Prediction](usage/pseudoknots.md) 11 | - Arnie on Sherlock 12 | - [Environment](sherlock/environment.md) 13 | - [Jobs](sherlock/jobs.md) 14 | - [Contributing](development/README.md) -------------------------------------------------------------------------------- /docs/assets/pseudoknot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/docs/assets/pseudoknot.png -------------------------------------------------------------------------------- /docs/development/README.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ## Installing via Github 4 | The `arnie` package source code is hosted on [Github](https://github.com/DasLab/arnie). You can clone the repo as below. 5 | 6 | ``` 7 | git clone https://github.com/DasLab/arnie.git 8 | ``` 9 | 10 | You can also use pip to install arnie from our Github repo: 11 | ``` 12 | pip install git+https://github.com/DasLab/arnie 13 | ``` 14 | This is particularly useful for testing new features internally before releasing on PyPI. 15 | 16 | ## Repo Organization 17 | 18 | `src/arnie`: source code for the arnie package. 19 | 20 | `docs`: docsify-based markdown documentation for the arnie package. 21 | 22 | `tests`: unit tests 23 | 24 | `notebooks`: example jupyter notebooks with usage. 25 | 26 | `scripts`: scripts for processing sequences in batch. 27 | 28 | `parameter_files`: dir of various parameter files for packages, put here out of convenience. 29 | 30 | 31 | 32 | ## Github Issues 33 | We use [Github issues](https://github.com/DasLab/arnie/issues) to coordinate development tasks and track feature development and bug fixes. If you run into problems while using `arnie`, please file an issue so that we can address the bug. Similarly, if you have a feature idea that could simplify your research, file an issue detailing your proposed feature. 34 | 35 | ## Package Testing 36 | Tests are located in the `tests` directory of the repo. We use the [pytest](https://docs.pytest.org/en/stable/) testing framework. Tests are run in the repo root directory. 37 | 38 | To run all the tests, 39 | ``` 40 | pytest 41 | ``` 42 | To run a specific test, 43 | ``` 44 | pytest tests/test_structure_handling.py 45 | ``` 46 | If you add new features or fix a bug, make sure to update the tests appropriately. 47 | 48 | ## Package Distribution 49 | We distribute arnie via the [Python Package Index](https://pypi.org/). The DasLab has a [PyPI account](https://pypi.org/user/daslab/) for all our packages, with `arnie` available [here](https://pypi.org/project/arnie/) 50 | 51 | Arnie package release is automated via Github Actions. The [release workflow](https://github.com/DasLab/arnie/actions/workflows/release.yml) builds the package for distribution, publishes to PyPI and releases a Github release. The action is triggered on new git tag push. 52 | 53 | To push a new release, update the `pyproject.toml` version number as appropriate (we follow the [semantic versioning](https://semver.org/) standard). Next, define a matching git tag for the version number, and then push to Github. 54 | ``` 55 | git checkout master 56 | git tag -a v1.1.0 -m "Arnie Release v1.1.0" 57 | git push origin tag v1.1.0 58 | ``` -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Document 6 | 7 | 8 | 9 | 13 | 18 | 24 | 30 | 36 | 45 | 46 | 47 |
48 | 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /docs/setup/environment.md: -------------------------------------------------------------------------------- 1 | # Setting up an arnie environment 2 | 3 | `arnie` is a Python package to simplify interacting with various RNA prediction and analysis libraries. To work, the `arnie` package needs to know the location of those libraries on the local filesystem. `arnie` uses environment variables to point to package locations. 4 | 5 | ## Environment Variables 6 | Here we assume you've already installed a package you want arnie to use (visit the [supported packages page](/setup/packages.md) for more details about specific package installation requirements). Arnie expects environment variables in the form of "{package_name}_PATH". So for `contrafold`, we specify its installed location for arnie with `export contrafold_PATH=/path/to/executable/`. Certain packages require additional resources for arnie to operate. For example, `SpotRNA` also requires a pointer to the conda environment it is installed with. The [supported packages page](/setup/packages.md) details each package's expected environment variables. 7 | 8 | Arnie also expects an `arnie_TMP` environment variable to define where arnie should write temporary files to. Some predictor packages write to files to generate their output; arnie uses the `arnie_TMP` location to support these packages. 9 | 10 | ## Arnie File 11 | As a fallback, you can also specify an "arnie_file.txt" that defines these paths. There is an example arnie_file.txt included in the arnie repo that demonstrates the expected syntax. If using the arnie_file approach, you need to set an `ARNIEFILE` environment variable pointing to your arnie_file.txt (e.g, `export ARNIEFILE="/path/to/arnie/"`) 12 | 13 | ## Conda Environments 14 | We recommend using [conda](https://anaconda.org/anaconda/conda) to set up private Python execution environments for your arnie operations. Conda simplifies the sometimes complicated process of managing Python dependencies by creating virtual environments that isolate installed packages. Conda also supports simplified distribution of a wide range of scientific Python libraries, and even a number of RNA structure packages. We recommend the following setup for your RNA science conda environment. 15 | ``` 16 | conda config --add channels bioconda 17 | conda config --add channels conda-forge 18 | conda config --set channel_priority strict 19 | ``` 20 | [Bioconda](https://bioconda.github.io/) and [Conda-Forge](https://conda-forge.org/) are distribution channels for conda packages. Bioconda, for instance, hosts `ViennaRNA` and `Eternafold` RNA packages 21 | 22 | We set up an example conda environment to support our arnie work below. First we create the environment. Next we activate the environment, which sets up our isolated Python execution environment. After activation, we pip install arnie (which will be installed into the isolated environment with proper PYTHONPATH handling). 23 | ``` 24 | conda create -n rna-env 25 | conda activate rna-env 26 | pip install arnie 27 | ``` -------------------------------------------------------------------------------- /docs/setup/install.md: -------------------------------------------------------------------------------- 1 | # Installing Arnie 2 | We recommend installing `arnie` as a package from [PyPI](https://pypi.org/project/arnie/) via pip. 3 | ``` 4 | pip install arnie 5 | ``` 6 | 7 | You can also use pip to install arnie directly from our Github repo: 8 | ``` 9 | pip install git+https://github.com/DasLab/arnie 10 | ``` 11 | 12 | ## Installing via Github 13 | Alternatively, the `arnie` package source code is hosted on [Github](https://github.com/DasLab/arnie). You can install via source code as below. 14 | 15 | ``` 16 | git clone https://github.com/DasLab/arnie.git 17 | ``` 18 | 19 | Note that if you install via the Github Repo, you will need to add the path to arnie to your PYTHONPATH (for example, in your .bashrc as `export PYTHONPATH=$PYTHONPATH:/path/to/arnie`). -------------------------------------------------------------------------------- /docs/setup/packages.md: -------------------------------------------------------------------------------- 1 | # Supported Packages 2 | 3 | ## Eternafold 4 | 5 | ## Contrafold 6 | 7 | ## Vienna 8 | 9 | ## NuPACK 10 | 11 | ## RNAstructure 12 | 13 | ## RNAsoft 14 | 15 | -------------------------------------------------------------------------------- /docs/sherlock/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/docs/sherlock/README.md -------------------------------------------------------------------------------- /docs/sherlock/environment.md: -------------------------------------------------------------------------------- 1 | # Sherlock 2 | 3 | [Sherlock](https://www.sherlock.stanford.edu/), Stanford's high-performance computing cluster, is a useful resource for compute-intensive arnie tasks. 4 | 5 | If you've never worked with cluster computing before, there are some differences in how things work from your laptop. The [Sherlock docs](https://www.sherlock.stanford.edu/docs/) are a great place to start, and the Sherlock team even does [onboarding sessions and office hours](https://www.sherlock.stanford.edu/docs/#onboarding-sessions) to help new users. 6 | 7 | ## Storage on Sherlock 8 | The first thing to understand when setting up your Sherlock environment is where to store data. Sherlock offers several [data storage systems](https://www.sherlock.stanford.edu/docs/storage/) tailored for specific needs. We recommend using them as follows: 9 | - `$HOME`: storage for your rna-environment, miniconda install, source code, etc 10 | - `$GROUPHOME`: storage for shared resources or projects that other lab members may access 11 | - `$SCRATCH/$GROUP_SCRATCH`: high-performance storage for large datasets and temporary files (WARNING: files on SCRATCH and GROUP_SCRATCH are automatically purged 12 | 90 days after their last content modification; make sure you back up data there before this window) 13 | - `$LSCRATCH`: node-local SSD; useful for specific jobs where high IOPS are important or when performing large batch jobs that may impact group resources 14 | - `$OAK`: long-term storage of large research datasets 15 | 16 | ## Installing software on Sherlock 17 | Sherlock provides specific scientific computing software pre-installed on the Sherlock system via ["modules"](https://www.sherlock.stanford.edu/docs/software/modules/). These modules are selected and maintained to provide maximum compatibility and reduce dependency conflicts. You can search for available modules [here](https://www.sherlock.stanford.edu/docs/software/list/). If you want to use Sherlock's module system instead of setting up your own Python environment, we recommend the following modules to load for arnie (and various downstream prediction algorithms): 18 | ``` 19 | module load python/3.6.1 20 | module load py-numpy/1.18.1_py36 21 | module load py-pandas/1.0.3_py36 22 | module load py-scipy/1.4.1_py36 23 | module load gcc 24 | module load glpk 25 | module load mpfr 26 | ``` 27 | 28 | ## Setting up your environment on Sherlock 29 | Setting up your environment on Sherlock is fairly straightforward. First, set up a folder for yourself in $GROUPHOME (`mkdir $GROUPHOME/{your_name}`) to store shared resources. Next, we'll install [miniconda](https://docs.anaconda.com/miniconda/) for Python environment management and package installation. After installing miniconda, we can configure conda with useful package channels, create and activate an rna-env environment to store our packages, and install arnie. We provide an environment.yaml folder at `$GROUPHOME/rna-env/rna-environment.yaml` to create a standard environment with some standard packages. 30 | ``` 31 | conda config --add channels bioconda 32 | conda config --add channels conda-forge 33 | conda config --set channel_priority strict 34 | conda create -n rna-env -f $GROUPHOME/rna-env/rna-environment.yaml 35 | conda activate rna-env 36 | pip install arnie 37 | ``` 38 | 39 | Your new conda environment has arnie and a few predictors installed. However, many prediction libraries are not available via conda or pip and usually require installing from source. The lab maintains a directory of predictors on Sherlock that you should copy to your $HOME directory. 40 | ``` 41 | cd $HOME 42 | git clone $GROUPHOME/rna-env 43 | ``` 44 | Predictors are stored under `rna-env/predictors`. If you add new predictors in the course of your work, make sure to push your updates back to the $GROUPHOME origin repo. 45 | 46 | Now that your environment is set up, let's take a look at [using Sherlock for compute jobs with arnie](jobs.md). -------------------------------------------------------------------------------- /docs/sherlock/jobs.md: -------------------------------------------------------------------------------- 1 | # Jobs on Sherlock 2 | 3 | [Running jobs](https://www.sherlock.stanford.edu/docs/user-guide/running-jobs/) on Sherlock is a little different. In order to fairly distribute cluster resources, Sherlock uses a scheduler called SLURM. Users define what work they want done in job files and submit them to the scheduler, which allocates the requested compute resources when it can. When your job is allocated resources, compute nodes with the requested resources will run your job automatically. 4 | 5 | ## Batch Jobs 6 | > A job is simply an instance of your program, for example your R, Python or Matlab script that is submitted to and executed by the scheduler (Slurm). When you submit a job with the `sbatch` command it's called a batch job and it will either run immediately or will pend (wait) in the queue. 7 | 8 | The [Sherlock jobs docs](https://www.sherlock.stanford.edu/docs/user-guide/running-jobs/#batch-jobs) are fairly comprehensive and will provide more detail than we can here. We will provide a few example batch scripts to demonstrate some standard uses. 9 | 10 | ## Interactive jobs 11 | It can be helpful when initially creating a job to work on it interactively on a compute node like the ones that will run your job. You can request a compute node via the command `sh_dev`. By default, sh_dev allocates one core and 4 GB of memory on one node for one hour. See [the docs](https://www.sherlock.stanford.edu/docs/user-guide/running-jobs/#interactive-jobs) for more details about requesting an interactive node. 12 | 13 | ## Interactive applications 14 | Sherlock provides several [interactive applications](https://www.sherlock.stanford.edu/docs/user-guide/ondemand/?h=jupyter#interactive-applications) if you need to run GUI based interactive software. When working with arnie you will most likely use [JupyterNotebooks](https://www.sherlock.stanford.edu/docs/user-guide/ondemand/?h=jupyter#jupyter-notebooks) or [JupyterLab](https://www.sherlock.stanford.edu/docs/user-guide/ondemand/?h=jupyter#jupyterlab) to interactively explore your research questions. 15 | 16 | ## Important Notes 17 | We have run into some common issues using Sherlock over the years. Here's a non-comprehensive list of things to watch out for while using Sherlock. 18 | 19 | - **Permissions errors**: 20 | 21 | If you plan on working on projects with group members and share files in `$GROUPHOME`, remember to set the permissions of files you create to be group accessible. By default, files will be read-only for group members. `chmod -R 770 /path/to/file` will allow group members to read, write, and execute shared files in `$GROUPHOME`. 22 | 23 | - **Large array jobs impacting $GROUPHOME**: 24 | 25 | Be careful about accessing resources in `$GROUPHOME` when running large array jobs. Thousands of the same job accessing the same files on `$GROUPHOME` can slow down file access for other lab members. In many cases, your code may not be the one accessing files in `$GROUPHOME`, but a predictor you're using might (`spotrna` causes this issue often). The best solution is to copy the files you're accessing to your `$SCRATCH` folder and access them there. If you have large array jobs requesting thousands of nodes, you may want to copy the files to the node's `$LSCRATCH` instead. See the array job example sbatch file for more details. 26 | 27 | 28 | -------------------------------------------------------------------------------- /docs/usage/README.md: -------------------------------------------------------------------------------- 1 | # Using Arnie 2 | Arnie's primary purpose is to simplify the process of making structure predictions for an RNA sequence with a variety of structure prediction libraries. 3 | 4 | ## RNA structure 5 | RNA molecules form complex three-dimensional shapes in nature. We represent these forms in three structure levels of increasing complexity. 6 | 7 | 1. **Primary Structure** 8 | The primary structure of an RNA molecule is the base identity of the various nucleotides that make up the molecule. This sequence string is typically written in the 5' to 3' direction. 9 | Example: "AGUAUCAAAAAAGAUAC" 10 | 11 | 2. **Secondary Structure** 12 | The secondary structure of an RNA molecule is the set of base paring interactions between nucleotides in an RNA molecule. There are multiple ways to computationally represent secondary structure, although arnie primarily uses two: the base pairing matrix and the dot bracket string. 13 | 14 | A ***base pairing matrix*** is an NxN matrix (where N is the length of the RNA sequence), with the value of the `i,j` position representing the probability of the `i` nucleotide pairing with the `j` nucleotide. 15 | 16 | A ***dot bracket string*** is a representation of secondary structure where `(` and `)` characters represent base pairs and `.` characters represent unpaired bases. For example, `((....))` in dot bracket notation indicates that the 1st nucleotide is paired with the 8th nucleotide, the 2nd nucleotide is paired with the 7th nucleotide, and the others are unpaired. More complex secondary structures can also be represented in dot bracket notation (see [Pseudoknots](usage/pseudoknots.md) for more details). 17 | 18 | Arnie provides [several methods to predict secondary structures](usage/structure_prediction.md). 19 | 20 | 3. **Tertiary Structure** 21 | The tertiary structure is the three-dimensional structure of the RNA molecule, with each atom located in a 3D coordinate space. Arnie doesn't work with this level of structure. 22 | 23 | ## Examples 24 | The easiest way to get started with arnie is trying out our example notebooks to explore the functionality arnie provides. 25 | 26 | - [Basic Introduction / Install](https://github.com/daslab/arnie/blob/master/notebooks/IntroToArnie.ipynb) 27 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/daslab/arnie/blob/master/notebooks/IntroToArnie.ipynb) 28 | -------------------------------------------------------------------------------- /docs/usage/pseudoknots.md: -------------------------------------------------------------------------------- 1 | # Pseudoknots 2 | 3 | [Pseudoknots](https://en.wikipedia.org/wiki/Pseudoknot) are a more complex form of secondary structure. 4 | 5 | example of a pseudoknot 6 | 7 | Unpaired bases in a loop structure may pair with nucleotides elsewhere in the RNA sequence. This type of pairing is impossible to represent with the `(`, `)`, `.` and characters in traditional dot bracket notation, so we introduce new characters to represent various levels of pseudoknot pairings. In order, Arnie uses `[`, `{`, `<`, and lower case alphabet characters (`abc...`) to represent opening pairs, and `]`, `}`, `>`, and upper case alphabet characters (`ABC...`) to represent closing pairs. 8 | 9 | Here is an example pseudoknotted structure in dot bracket notation utilizing the expanded character set `...(((..[[[.(((...))))))]]]...`. 10 | 11 | Many traditional structure prediction algorithms struggle with predicting pseudoknot structures, but there are a variety of approaches that can predict these complex folds. Arnie provides two main functions to predict pseudoknots: `pk_predict` and `pk_predict_from_bpp`. 12 | 13 | ## pk_predict 14 | `pk_predict` takes an input RNA sequence string and returns a predicted secondary structure string in dot bracket notation that may include pseudoknots. It's very similar to the `mfe` function, but supports a different set of predictor packages that focus on pseudoknot prediction. 15 | 16 | **Args:** 17 | ``` 18 | seq (str): nucleic acid sequence, required 19 | predictor (str): the folding library to use 20 | ipknot options: 21 | model: one of ["LinearPartition-C","LinearPartition-V","Boltzmann","ViennaRNA","CONTRAfold","NUPACK"] 22 | t1: probability threshold level 1 23 | t2: probability threshold level 2 24 | refinement: number of times for refinment 25 | 26 | hotknots options: 27 | model: one of ["CC","RE","DP"] 28 | param: one of ["parameters_CC06.txt","parameters_CC09.txt","parameters_DP03.txt","parameters_DP09.txt"] 29 | 30 | spotrna options: 31 | cpu: number cpu threads 32 | ``` 33 | 34 | **Returns:** 35 | ``` 36 | A string in dot bracket notation representing the predicted secondary structure of the provided sequence, potentially including pseudoknots. 37 | ``` 38 | 39 | **Example:** 40 | ``` 41 | pk_predict("GUAUCAAAAAAGAUACGCCGUAUGCUAAUAUGUAUCUAUACUUGCUCUACAGGUUGAG", "knotty") 42 | 43 | '..........(((((([[[[[[.[[...[[[))))))]]]...]]..]]].]]]....' 44 | ``` 45 | 46 | **Supported packages:** 47 | - `hotknots` 48 | - `ipknot` 49 | - `knotty` 50 | - `spotrna` 51 | - `spotrna2` 52 | - `e2efold` 53 | - `pknots` 54 | - `nupack` 55 | 56 | ## pk_predict_from_bpp 57 | `pk_predict_from_bpp` takes a different approach to pseudoknot prediction. Rather than use dedicated pseudoknot prediction packages, `pk_predict_from_bpp` uses post-processing algorithms that can predict likely pseudoknots based on a sequence's predicted base pair probability matrix. This allows us to examine sequences for predicted pseudoknots with traditional predictive models that don't support pseudoknots by default. 58 | 59 | `pk_predict_from_bpp` provides two processing algorithms, [`threshknot`](https://arxiv.org/abs/1912.12796) and [`hungarian`](https://en.wikipedia.org/wiki/Hungarian_algorithm). 60 | 61 | **Args:** 62 | ``` 63 | bpp (array): base pair probability matrix, required 64 | heuristic (str): the pk prediction algorithm to use; either "hungarian" or "threshknot" 65 | threshknot options: 66 | theta 67 | max_iter 68 | allowed_buldge_len 69 | min_len_helix 70 | 71 | hungarian options: 72 | add_p_unpaired 73 | theta (aka prob_to_0_threshold_post) 74 | prob_to_0_threshold_prior 75 | prob_to_1_threshold_prior 76 | exp 77 | sigmoid_slope_factor 78 | ln 79 | allowed_buldge_len 80 | min_len_helix 81 | ``` 82 | 83 | **Returns:** 84 | ``` 85 | A string in dot bracket notation representing the predicted secondary structure of the provided sequence, potentially including pseudoknots. 86 | ``` 87 | 88 | **Example:** 89 | ``` 90 | bpps = bpps("GUAUCAAAAAAGAUACGCCGUAUGCUAAUAUGUAGGCGCUAUACUUGCUCUACACCGGCGGUUGAG", package="eternafold") 91 | pk_predict_bpp(bpps) 92 | 93 | '(((((......)))))..........................................' 94 | ``` 95 | 96 | **Supported packages:** 97 | - `eternafold` 98 | - `contrafold` 99 | - `vienna` 100 | - `nupack` 101 | - `rnasoft` 102 | - `rnastructure` 103 | - `vfold` 104 | 105 | -------------------------------------------------------------------------------- /docs/usage/structure_prediction.md: -------------------------------------------------------------------------------- 1 | 2 | ## Structure Prediction 3 | 4 | ## MFE 5 | The `mfe` function generates a "minimum free energy" structure prediction with the selected package. The minimum free energy prediction is the secondary structure calculated to have the lowest free energy value. In theory, the lower the free energy, the more likely the structure is to form. Not all predictors support free energy-based estimates (although many do). 6 | 7 | Note: `mfe` operates differently than [`mea`](#mea). That said, contrafold's default structure prediction is an MEA structure, not MFE. When using `mfe`, calling contrafold returns the default MEA structure unless the `--viterbi` flag is used, which will use the viterbi (MFE) algorithm in contrafold. 8 | 9 | 10 | **Args:** 11 | ``` 12 | seq (str): nucleic acid sequence, required 13 | package (str): the folding library to use 14 | T (float): temperature (Celsius) 15 | constraint (str): structure constraints 16 | motif (str): argument to vienna motif 17 | linear (bool): call LinearFold to estimate MFE in Vienna or Contrafold 18 | return_dG_MFE (bool): also return dG(MFE) (specific to linearfold) 19 | dangles (bool): dangles or not (specific to linearfold) 20 | noncanonical(bool): include noncanonical pairs or not (specific to contrafold, RNAstructure (Cyclefold)) 21 | param_file(str): path to specific thermodynamic parameter file (specific to contrafold, eternafold) 22 | coaxial (bool): coaxial stacking or not (specific to rnastructure) 23 | viterbi (bool): use the viterbi algorithm for mfe calculation (specific to contrafold) 24 | pseudo (bool): if True, will predict pseudoknots 25 | shape_signal (list): list of normalized SHAPE reactivities, with negative values indicating no signal (specific to rnastructure) 26 | dms_signal (list): list of normalized DMS reactivities, with negative values indicating no signal (specific to rnastructure) 27 | shape_file (str): path to file containing shape_signal (specific to rnastructure) 28 | dms_file (str): path to file containing dms_signal (specific to rnastructure) 29 | ``` 30 | 31 | **Returns:** 32 | ``` 33 | A string in dot bracket notation representing the calculated MFE structure of the provided sequence. 34 | ``` 35 | 36 | **Example:** 37 | ``` 38 | mfe("GUAUCAAAAAAGAUAC") 39 | '(((((......)))))' 40 | ``` 41 | 42 | **Supported packages:** 43 | - `eternafold` 44 | - `contrafold` 45 | - `vienna` 46 | - `rnastructure` 47 | - `linearfold` 48 | 49 | ## BPPS 50 | The `bpps` function calculates the "base pairing probability matrix" with the selected package. The base pairing probaility matrix is an NxN matrix (where N is the length of the RNA sequence), with the value of the `i,j` position representing the probability of the `i` nucleotide pairing with the `j` nucleotide. 51 | 52 | **Args:** 53 | ``` 54 | sequence (str): nucleic acid sequence, required 55 | package (str): the folding library to use 56 | constraint (str): structure constraint [vienna, contrafold, rnastructure] 57 | linear (bool): call LinearPartition to estimate Z in Vienna or Contrafold 58 | 59 | motif (str): argument to vienna motif 60 | pseudo (bool): (NUPACK only) include pseudoknot calculation 61 | dangles (bool): dangles or not, specifiable for vienna, nupack 62 | dna (bool): (NUPACK only) use SantaLucia 1998 parameters for DNA 63 | coaxial (bool): coaxial stacking or not, specifiable for rnastructure, vfold 64 | noncanonical(bool): include noncanonical pairs or not (for contrafold, RNAstructure (Cyclefold)) 65 | beam size (int): Beam size for LinearPartition base pair calculation. 66 | DEBUG (bool): Output command-line calls to packages. 67 | threshknot (bool): calls threshknot to predict pseudoknots (for contrafold with LinearPartition) 68 | shape_signal (list): list of normalized SHAPE reactivities, with negative values indicating no signal (specific to rnastructure) 69 | dms_signal (list): list of normalized DMS reactivities, with negative values indicating no signal (specific to rnastructure) 70 | shape_file (str): path to file containing shape_signal (specific to rnastructure) 71 | dms_file (str): path to file containing dms_signal (specific to rnastructure) 72 | ``` 73 | 74 | **Returns:** 75 | ``` 76 | array: NxN matrix of base pair probabilities 77 | ``` 78 | 79 | **Example:** 80 | ``` 81 | bpps("GUAUCAAAAAAGAUAC") 82 | array([[0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 3.77178e-04, 83 | 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 84 | 0.00000e+00, 0.00000e+00, 0.00000e+00, 4.39771e-04, 0.00000e+00, 85 | 8.24776e-01], 86 | [0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 87 | 0.00000e+00, 1.69534e-04, 2.01963e-04, 1.93469e-04, 2.05658e-04, 88 | 2.01099e-04, 1.37709e-04, 5.21924e-04, 0.00000e+00, 8.42528e-01, 89 | 0.00000e+00], 90 | ... 91 | ``` 92 | 93 | **Supported packages:** 94 | - `eternafold` 95 | - `contrafold` 96 | - `vienna` 97 | - `nupack` 98 | - `rnasoft` 99 | - `rnastructure` 100 | - `vfold` -------------------------------------------------------------------------------- /docs/usage/utilities.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/docs/usage/utilities.md -------------------------------------------------------------------------------- /example_arnie_file.txt: -------------------------------------------------------------------------------- 1 | # paths to local installations of packages. If package is not installed, leave as None 2 | # Replace paths below with paths to your installations. 3 | # NB: .gitignore file ignores *.arnie files. Name it as such if you don't want your local path file 4 | # included with your git repo. 5 | 6 | rnastructure: /path/to/RNAstructure/exe 7 | rnasoft: /path/to/MultiRNAFold 8 | contrafold_2: /path/to/contrafold-se/src 9 | eternafold: /path/to/eternafold/src/ 10 | vfold: /path/to/Vfold2D 11 | nupack: /path/to/nupack3.2.2/build/bin 12 | 13 | # for a Mac installed binary: 14 | vienna_2: /usr/local/bin 15 | # for path to a vienna build: 16 | vienna_2: /path/to/ViennaRNA-2.4.10/src/bin 17 | vienna_1: /path/to/ViennaRNA-1.8.5/bin 18 | 19 | # for linear partition 20 | linearfold: /path/to/LinearFold/bin 21 | linearpartition: /path/to/LinearPartition/bin 22 | 23 | # for PK predictors 24 | hotknots: /path/to/HotKnots_v2.0/bin 25 | ipknot: /path/to/ipknot/build 26 | knotty: /path/to/Knotty 27 | pknots: /path/to/PKNOTS/bin 28 | spotrna: /path/to/SPOT-RNA 29 | spotrna_conda_env: /path/to/miniconda3/envs/spotrna/bin 30 | spotrna2: /path/to/SPOT-RNA2 31 | e2efold: /path/to/e2efold/e2efold_productive 32 | e2efold_conda_env: /path/to/miniconda3/envs/e2efold/bin 33 | 34 | #TMP: location for tmp files for packages. Update to where you want your tmp files stored. 35 | TMP: /tmp 36 | -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- 1 | # Notebooks 2 | This directory houses various notebooks demonstrating key Arnie functionality. 3 | 4 | - [Basic Introduction / Install](https://github.com/daslab/arnie/blob/master/notebooks/IntroToArnie.ipynb) 5 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/daslab/arnie/blob/master/notebooks/IntroToArnie.ipynb) -------------------------------------------------------------------------------- /parameter_files/contrafold.params.complementary: -------------------------------------------------------------------------------- 1 | base_pair_AA 0 2 | base_pair_AC 0 3 | base_pair_AG 0 4 | base_pair_AU 0.59791199 5 | base_pair_CC 0 6 | base_pair_CG 1.544290641 7 | base_pair_CU 0 8 | base_pair_GG 0 9 | base_pair_GU -0.01304754992 10 | base_pair_UU 0 11 | terminal_mismatch_AAAA 0 12 | terminal_mismatch_AAAC 0 13 | terminal_mismatch_AAAG 0 14 | terminal_mismatch_AAAU 0 15 | terminal_mismatch_AACA 0 16 | terminal_mismatch_AACC 0 17 | terminal_mismatch_AACG 0 18 | terminal_mismatch_AACU 0 19 | terminal_mismatch_AAGA 0 20 | terminal_mismatch_AAGC 0 21 | terminal_mismatch_AAGG 0 22 | terminal_mismatch_AAGU 0 23 | terminal_mismatch_AAUA 0 24 | terminal_mismatch_AAUC 0 25 | terminal_mismatch_AAUG 0 26 | terminal_mismatch_AAUU 0 27 | terminal_mismatch_ACAA 0 28 | terminal_mismatch_ACAC 0 29 | terminal_mismatch_ACAG 0 30 | terminal_mismatch_ACAU 0 31 | terminal_mismatch_ACCA 0 32 | terminal_mismatch_ACCC 0 33 | terminal_mismatch_ACCG 0 34 | terminal_mismatch_ACCU 0 35 | terminal_mismatch_ACGA 0 36 | terminal_mismatch_ACGC 0 37 | terminal_mismatch_ACGG 0 38 | terminal_mismatch_ACGU 0 39 | terminal_mismatch_ACUA 0 40 | terminal_mismatch_ACUC 0 41 | terminal_mismatch_ACUG 0 42 | terminal_mismatch_ACUU 0 43 | terminal_mismatch_AGAA 0 44 | terminal_mismatch_AGAC 0 45 | terminal_mismatch_AGAG 0 46 | terminal_mismatch_AGAU 0 47 | terminal_mismatch_AGCA 0 48 | terminal_mismatch_AGCC 0 49 | terminal_mismatch_AGCG 0 50 | terminal_mismatch_AGCU 0 51 | terminal_mismatch_AGGA 0 52 | terminal_mismatch_AGGC 0 53 | terminal_mismatch_AGGG 0 54 | terminal_mismatch_AGGU 0 55 | terminal_mismatch_AGUA 0 56 | terminal_mismatch_AGUC 0 57 | terminal_mismatch_AGUG 0 58 | terminal_mismatch_AGUU 0 59 | terminal_mismatch_AUAA -0.184546064 60 | terminal_mismatch_AUAC -0.1181844187 61 | terminal_mismatch_AUAG -0.4461469607 62 | terminal_mismatch_AUAU -0.6175254495 63 | terminal_mismatch_AUCA 0.004788458708 64 | terminal_mismatch_AUCC 0.08319395146 65 | terminal_mismatch_AUCG -0.2249479995 66 | terminal_mismatch_AUCU -0.3981327204 67 | terminal_mismatch_AUGA 0.5191110288 68 | terminal_mismatch_AUGC -0.3524119307 69 | terminal_mismatch_AUGG -0.4056429433 70 | terminal_mismatch_AUGU -0.7733932162 71 | terminal_mismatch_AUUA -0.01574403519 72 | terminal_mismatch_AUUC 0.268570042 73 | terminal_mismatch_AUUG -0.0934388741 74 | terminal_mismatch_AUUU 0.3373711531 75 | terminal_mismatch_CAAA 0 76 | terminal_mismatch_CAAC 0 77 | terminal_mismatch_CAAG 0 78 | terminal_mismatch_CAAU 0 79 | terminal_mismatch_CACA 0 80 | terminal_mismatch_CACC 0 81 | terminal_mismatch_CACG 0 82 | terminal_mismatch_CACU 0 83 | terminal_mismatch_CAGA 0 84 | terminal_mismatch_CAGC 0 85 | terminal_mismatch_CAGG 0 86 | terminal_mismatch_CAGU 0 87 | terminal_mismatch_CAUA 0 88 | terminal_mismatch_CAUC 0 89 | terminal_mismatch_CAUG 0 90 | terminal_mismatch_CAUU 0 91 | terminal_mismatch_CCAA 0 92 | terminal_mismatch_CCAC 0 93 | terminal_mismatch_CCAG 0 94 | terminal_mismatch_CCAU 0 95 | terminal_mismatch_CCCA 0 96 | terminal_mismatch_CCCC 0 97 | terminal_mismatch_CCCG 0 98 | terminal_mismatch_CCCU 0 99 | terminal_mismatch_CCGA 0 100 | terminal_mismatch_CCGC 0 101 | terminal_mismatch_CCGG 0 102 | terminal_mismatch_CCGU 0 103 | terminal_mismatch_CCUA 0 104 | terminal_mismatch_CCUC 0 105 | terminal_mismatch_CCUG 0 106 | terminal_mismatch_CCUU 0 107 | terminal_mismatch_CGAA 0.08386423535 108 | terminal_mismatch_CGAC -0.2520716816 109 | terminal_mismatch_CGAG -0.6711841881 110 | terminal_mismatch_CGAU -0.3816350028 111 | terminal_mismatch_CGCA 0.1117852189 112 | terminal_mismatch_CGCC -0.1704393624 113 | terminal_mismatch_CGCG -0.2179987732 114 | terminal_mismatch_CGCU -0.459267635 115 | terminal_mismatch_CGGA 0.8520640313 116 | terminal_mismatch_CGGC -0.9332488517 117 | terminal_mismatch_CGGG -0.3289551692 118 | terminal_mismatch_CGGU -0.7778822056 119 | terminal_mismatch_CGUA -0.2422339958 120 | terminal_mismatch_CGUC -0.03780509247 121 | terminal_mismatch_CGUG -0.4322334143 122 | terminal_mismatch_CGUU -0.2419976114 123 | terminal_mismatch_CUAA 0 124 | terminal_mismatch_CUAC 0 125 | terminal_mismatch_CUAG 0 126 | terminal_mismatch_CUAU 0 127 | terminal_mismatch_CUCA 0 128 | terminal_mismatch_CUCC 0 129 | terminal_mismatch_CUCG 0 130 | terminal_mismatch_CUCU 0 131 | terminal_mismatch_CUGA 0 132 | terminal_mismatch_CUGC 0 133 | terminal_mismatch_CUGG 0 134 | terminal_mismatch_CUGU 0 135 | terminal_mismatch_CUUA 0 136 | terminal_mismatch_CUUC 0 137 | terminal_mismatch_CUUG 0 138 | terminal_mismatch_CUUU 0 139 | terminal_mismatch_GAAA 0 140 | terminal_mismatch_GAAC 0 141 | terminal_mismatch_GAAG 0 142 | terminal_mismatch_GAAU 0 143 | terminal_mismatch_GACA 0 144 | terminal_mismatch_GACC 0 145 | terminal_mismatch_GACG 0 146 | terminal_mismatch_GACU 0 147 | terminal_mismatch_GAGA 0 148 | terminal_mismatch_GAGC 0 149 | terminal_mismatch_GAGG 0 150 | terminal_mismatch_GAGU 0 151 | terminal_mismatch_GAUA 0 152 | terminal_mismatch_GAUC 0 153 | terminal_mismatch_GAUG 0 154 | terminal_mismatch_GAUU 0 155 | terminal_mismatch_GCAA -0.1703136025 156 | terminal_mismatch_GCAC -0.09154056357 157 | terminal_mismatch_GCAG -0.2522413002 158 | terminal_mismatch_GCAU -0.8520314799 159 | terminal_mismatch_GCCA 0.04763224188 160 | terminal_mismatch_GCCC -0.2428654283 161 | terminal_mismatch_GCCG -0.2079275061 162 | terminal_mismatch_GCCU -0.1874270053 163 | terminal_mismatch_GCGA 0.6540033983 164 | terminal_mismatch_GCGC -0.7823988605 165 | terminal_mismatch_GCGG 0.1995898255 166 | terminal_mismatch_GCGU -0.4432169392 167 | terminal_mismatch_GCUA -0.1736921762 168 | terminal_mismatch_GCUC 0.288494362 169 | terminal_mismatch_GCUG -0.01638238057 170 | terminal_mismatch_GCUU 0.6757988971 171 | terminal_mismatch_GGAA 0 172 | terminal_mismatch_GGAC 0 173 | terminal_mismatch_GGAG 0 174 | terminal_mismatch_GGAU 0 175 | terminal_mismatch_GGCA 0 176 | terminal_mismatch_GGCC 0 177 | terminal_mismatch_GGCG 0 178 | terminal_mismatch_GGCU 0 179 | terminal_mismatch_GGGA 0 180 | terminal_mismatch_GGGC 0 181 | terminal_mismatch_GGGG 0 182 | terminal_mismatch_GGGU 0 183 | terminal_mismatch_GGUA 0 184 | terminal_mismatch_GGUC 0 185 | terminal_mismatch_GGUG 0 186 | terminal_mismatch_GGUU 0 187 | terminal_mismatch_GUAA -0.4871607613 188 | terminal_mismatch_GUAC 0.1105031953 189 | terminal_mismatch_GUAG 0.363373916 190 | terminal_mismatch_GUAU -0.6193199348 191 | terminal_mismatch_GUCA 0.3451056056 192 | terminal_mismatch_GUCC 0.0314944976 193 | terminal_mismatch_GUCG -0.3799172956 194 | terminal_mismatch_GUCU -0.03222973182 195 | terminal_mismatch_GUGA 0.4948638637 196 | terminal_mismatch_GUGC -0.2821952552 197 | terminal_mismatch_GUGG -0.2702227211 198 | terminal_mismatch_GUGU -0.06658395291 199 | terminal_mismatch_GUUA -0.4306154451 200 | terminal_mismatch_GUUC -0.09497863465 201 | terminal_mismatch_GUUG -0.3130794485 202 | terminal_mismatch_GUUU -0.2283242981 203 | terminal_mismatch_UAAA 0.0115363879 204 | terminal_mismatch_UAAC -0.3923408221 205 | terminal_mismatch_UAAG 0.05661063599 206 | terminal_mismatch_UAAU -0.1251485388 207 | terminal_mismatch_UACA -0.06545074758 208 | terminal_mismatch_UACC -0.3167200568 209 | terminal_mismatch_UACG 0.002258383981 210 | terminal_mismatch_UACU -0.422217724 211 | terminal_mismatch_UAGA 0.5458416646 212 | terminal_mismatch_UAGC -0.2085887954 213 | terminal_mismatch_UAGG -0.1971766062 214 | terminal_mismatch_UAGU -0.4722410132 215 | terminal_mismatch_UAUA -0.1779642496 216 | terminal_mismatch_UAUC 0.1643454344 217 | terminal_mismatch_UAUG -0.5005617032 218 | terminal_mismatch_UAUU 0.1333867679 219 | terminal_mismatch_UCAA 0 220 | terminal_mismatch_UCAC 0 221 | terminal_mismatch_UCAG 0 222 | terminal_mismatch_UCAU 0 223 | terminal_mismatch_UCCA 0 224 | terminal_mismatch_UCCC 0 225 | terminal_mismatch_UCCG 0 226 | terminal_mismatch_UCCU 0 227 | terminal_mismatch_UCGA 0 228 | terminal_mismatch_UCGC 0 229 | terminal_mismatch_UCGG 0 230 | terminal_mismatch_UCGU 0 231 | terminal_mismatch_UCUA 0 232 | terminal_mismatch_UCUC 0 233 | terminal_mismatch_UCUG 0 234 | terminal_mismatch_UCUU 0 235 | terminal_mismatch_UGAA 0.1218741278 236 | terminal_mismatch_UGAC 0.1990260141 237 | terminal_mismatch_UGAG 0.04681893928 238 | terminal_mismatch_UGAU 0.3256264491 239 | terminal_mismatch_UGCA 0.1186812326 240 | terminal_mismatch_UGCC -0.1851065102 241 | terminal_mismatch_UGCG -0.04311512683 242 | terminal_mismatch_UGCU -0.6150608139 243 | terminal_mismatch_UGGA 0.754933218 244 | terminal_mismatch_UGGC -0.3150708483 245 | terminal_mismatch_UGGG 0.1569582926 246 | terminal_mismatch_UGGU -0.514970007 247 | terminal_mismatch_UGUA -0.2926246029 248 | terminal_mismatch_UGUC 0.1373068149 249 | terminal_mismatch_UGUG -0.05422333363 250 | terminal_mismatch_UGUU 0.03086776921 251 | terminal_mismatch_UUAA 0 252 | terminal_mismatch_UUAC 0 253 | terminal_mismatch_UUAG 0 254 | terminal_mismatch_UUAU 0 255 | terminal_mismatch_UUCA 0 256 | terminal_mismatch_UUCC 0 257 | terminal_mismatch_UUCG 0 258 | terminal_mismatch_UUCU 0 259 | terminal_mismatch_UUGA 0 260 | terminal_mismatch_UUGC 0 261 | terminal_mismatch_UUGG 0 262 | terminal_mismatch_UUGU 0 263 | terminal_mismatch_UUUA 0 264 | terminal_mismatch_UUUC 0 265 | terminal_mismatch_UUUG 0 266 | terminal_mismatch_UUUU 0 267 | hairpin_length_at_least_0 -5.993180158 268 | hairpin_length_at_least_1 -3.108105762 269 | hairpin_length_at_least_2 0.4168976347 270 | hairpin_length_at_least_3 2.205419066 271 | hairpin_length_at_least_4 1.926749692 272 | hairpin_length_at_least_5 -0.5873245329 273 | hairpin_length_at_least_6 -0.0827571778 274 | hairpin_length_at_least_7 0.5783889844 275 | hairpin_length_at_least_8 -0.7220883372 276 | hairpin_length_at_least_9 -0.1725874624 277 | hairpin_length_at_least_10 -0.3025089867 278 | hairpin_length_at_least_11 -0.0296315939 279 | hairpin_length_at_least_12 -0.9268995948 280 | hairpin_length_at_least_13 -0.03157753978 281 | hairpin_length_at_least_14 -0.1022472101 282 | hairpin_length_at_least_15 0.1901407346 283 | hairpin_length_at_least_16 -0.09280909826 284 | hairpin_length_at_least_17 0.1690448408 285 | hairpin_length_at_least_18 -0.08172566471 286 | hairpin_length_at_least_19 -0.3445939031 287 | hairpin_length_at_least_20 -0.109150294 288 | hairpin_length_at_least_21 -0.2903523693 289 | hairpin_length_at_least_22 -0.3393713667 290 | hairpin_length_at_least_23 -0.1915364117 291 | hairpin_length_at_least_24 -0.05019209379 292 | hairpin_length_at_least_25 -0.03874620924 293 | hairpin_length_at_least_26 0.04751470752 294 | hairpin_length_at_least_27 0.06744321926 295 | hairpin_length_at_least_28 0.09721875726 296 | hairpin_length_at_least_29 0.1673131733 297 | hairpin_length_at_least_30 0.2329937249 298 | internal_explicit_1_1 -0.1754591076 299 | internal_explicit_1_2 0.03083787104 300 | internal_explicit_1_3 -0.171565435 301 | internal_explicit_1_4 -0.2294680983 302 | internal_explicit_2_2 -0.1304072693 303 | internal_explicit_2_3 -0.07730329553 304 | internal_explicit_2_4 0.2782767264 305 | internal_explicit_3_3 -0.02898949617 306 | internal_explicit_3_4 0.3112350694 307 | internal_explicit_4_4 -0.3226348245 308 | bulge_length_at_least_1 -2.399548472 309 | bulge_length_at_least_2 -0.8945183117 310 | bulge_length_at_least_3 -0.9088550909 311 | bulge_length_at_least_4 -0.8412474755 312 | bulge_length_at_least_5 -0.4365479343 313 | bulge_length_at_least_6 -0.5699187801 314 | bulge_length_at_least_7 0.2002834224 315 | bulge_length_at_least_8 0.7538761358 316 | bulge_length_at_least_9 -0.6045045455 317 | bulge_length_at_least_10 -0.7200948098 318 | bulge_length_at_least_11 -0.5136721921 319 | bulge_length_at_least_12 -0.3614726679 320 | bulge_length_at_least_13 -0.2614454392 321 | bulge_length_at_least_14 -0.1593926893 322 | bulge_length_at_least_15 -0.08624668281 323 | bulge_length_at_least_16 -0.03107090996 324 | bulge_length_at_least_17 -0.01097222032 325 | bulge_length_at_least_18 0.03001220283 326 | bulge_length_at_least_19 0.04759123789 327 | bulge_length_at_least_20 -0.04296172065 328 | bulge_length_at_least_21 -0.01791899662 329 | bulge_length_at_least_22 -0.07800551522 330 | bulge_length_at_least_23 -0.0709932643 331 | bulge_length_at_least_24 -0.05767952896 332 | bulge_length_at_least_25 -0.04633794681 333 | bulge_length_at_least_26 -0.03559420456 334 | bulge_length_at_least_27 -0.02674934394 335 | bulge_length_at_least_28 -0.01818957972 336 | bulge_length_at_least_29 -0.01052300732 337 | bulge_length_at_least_30 -0.005153626846 338 | internal_length_at_least_2 -0.429061443 339 | internal_length_at_least_3 -0.3532111501 340 | internal_length_at_least_4 -0.3963797535 341 | internal_length_at_least_5 -0.3111199175 342 | internal_length_at_least_6 -0.2551945472 343 | internal_length_at_least_7 -0.05149116898 344 | internal_length_at_least_8 -0.04319002407 345 | internal_length_at_least_9 0.001985489485 346 | internal_length_at_least_10 -0.1761513136 347 | internal_length_at_least_11 -0.2639686207 348 | internal_length_at_least_12 -0.3460613577 349 | internal_length_at_least_13 -0.2926603079 350 | internal_length_at_least_14 -0.03624250307 351 | internal_length_at_least_15 -0.1199953761 352 | internal_length_at_least_16 -0.04354771926 353 | internal_length_at_least_17 -0.08209293135 354 | internal_length_at_least_18 -0.007113226038 355 | internal_length_at_least_19 0.02354824852 356 | internal_length_at_least_20 0.03066973571 357 | internal_length_at_least_21 -0.06618241094 358 | internal_length_at_least_22 -0.1316092383 359 | internal_length_at_least_23 -0.1407995514 360 | internal_length_at_least_24 -0.06600291862 361 | internal_length_at_least_25 -0.07779204744 362 | internal_length_at_least_26 -0.05084201265 363 | internal_length_at_least_27 -0.04139875601 364 | internal_length_at_least_28 0.003276583405 365 | internal_length_at_least_29 0.00592458284 366 | internal_length_at_least_30 0.006875738004 367 | internal_symmetric_length_at_least_1 -0.5467082599 368 | internal_symmetric_length_at_least_2 -0.3854701647 369 | internal_symmetric_length_at_least_3 -0.2588466401 370 | internal_symmetric_length_at_least_4 -0.2340836745 371 | internal_symmetric_length_at_least_5 0.1450577765 372 | internal_symmetric_length_at_least_6 -0.6562932515 373 | internal_symmetric_length_at_least_7 -0.3021088369 374 | internal_symmetric_length_at_least_8 -0.03032275267 375 | internal_symmetric_length_at_least_9 -0.3517944058 376 | internal_symmetric_length_at_least_10 -0.2159132506 377 | internal_symmetric_length_at_least_11 -0.1228270454 378 | internal_symmetric_length_at_least_12 -0.1552208595 379 | internal_symmetric_length_at_least_13 -0.08541120743 380 | internal_symmetric_length_at_least_14 -0.04592109799 381 | internal_symmetric_length_at_least_15 -0.02232234236 382 | internal_asymmetry_at_least_1 -2.105646719 383 | internal_asymmetry_at_least_2 -0.5520140431 384 | internal_asymmetry_at_least_3 -0.577070767 385 | internal_asymmetry_at_least_4 -0.6136667847 386 | internal_asymmetry_at_least_5 -0.3057156841 387 | internal_asymmetry_at_least_6 -0.1155052001 388 | internal_asymmetry_at_least_7 -0.2105612231 389 | internal_asymmetry_at_least_8 -0.314574313 390 | internal_asymmetry_at_least_9 -0.3148961681 391 | internal_asymmetry_at_least_10 -0.09018189492 392 | internal_asymmetry_at_least_11 -0.2200026794 393 | internal_asymmetry_at_least_12 -0.1406483243 394 | internal_asymmetry_at_least_13 -0.2162411259 395 | internal_asymmetry_at_least_14 -0.1725531435 396 | internal_asymmetry_at_least_15 -0.1558911866 397 | internal_asymmetry_at_least_16 -0.1040858663 398 | internal_asymmetry_at_least_17 -0.06967684228 399 | internal_asymmetry_at_least_18 -0.04105977494 400 | internal_asymmetry_at_least_19 -0.01570624316 401 | internal_asymmetry_at_least_20 0.01382000639 402 | internal_asymmetry_at_least_21 0.04131988563 403 | internal_asymmetry_at_least_22 0.0359418595 404 | internal_asymmetry_at_least_23 0.02822186282 405 | internal_asymmetry_at_least_24 0.01636585874 406 | internal_asymmetry_at_least_25 0.02550056175 407 | internal_asymmetry_at_least_26 0.03348032793 408 | internal_asymmetry_at_least_27 0.03971924412 409 | internal_asymmetry_at_least_28 -0.002545113932 410 | bulge_0x1_nucleotides_A -0.1216861662 411 | bulge_0x1_nucleotides_C -0.07111241127 412 | bulge_0x1_nucleotides_G 0.008947026647 413 | bulge_0x1_nucleotides_U -0.002685763742 414 | internal_1x1_nucleotides_AA 0.2944404686 415 | internal_1x1_nucleotides_AC 0.08641360967 416 | internal_1x1_nucleotides_AG -0.3664197228 417 | internal_1x1_nucleotides_AU -0.2053107048 418 | internal_1x1_nucleotides_CC -0.1582543624 419 | internal_1x1_nucleotides_CG 0.4175273724 420 | internal_1x1_nucleotides_CU 0.1368762582 421 | internal_1x1_nucleotides_GG -0.1193514754 422 | internal_1x1_nucleotides_GU -0.4188101413 423 | internal_1x1_nucleotides_UU 0.147140653 424 | helix_stacking_AAAA 0 425 | helix_stacking_AAAC 0 426 | helix_stacking_AAAG 0 427 | helix_stacking_AAAU 0 428 | helix_stacking_AACA 0 429 | helix_stacking_AACC 0 430 | helix_stacking_AACG 0 431 | helix_stacking_AACU 0 432 | helix_stacking_AAGA 0 433 | helix_stacking_AAGC 0 434 | helix_stacking_AAGG 0 435 | helix_stacking_AAGU 0 436 | helix_stacking_AAUA 0 437 | helix_stacking_AAUC 0 438 | helix_stacking_AAUG 0 439 | helix_stacking_AAUU 0 440 | helix_stacking_ACAC 0 441 | helix_stacking_ACAG 0 442 | helix_stacking_ACAU 0 443 | helix_stacking_ACCA 0 444 | helix_stacking_ACCC 0 445 | helix_stacking_ACCG 0 446 | helix_stacking_ACCU 0 447 | helix_stacking_ACGA 0 448 | helix_stacking_ACGC 0 449 | helix_stacking_ACGG 0 450 | helix_stacking_ACGU 0 451 | helix_stacking_ACUA 0 452 | helix_stacking_ACUC 0 453 | helix_stacking_ACUG 0 454 | helix_stacking_ACUU 0 455 | helix_stacking_AGAC 0 456 | helix_stacking_AGAG 0 457 | helix_stacking_AGAU 0 458 | helix_stacking_AGCC 0 459 | helix_stacking_AGCG 0 460 | helix_stacking_AGCU 0 461 | helix_stacking_AGGA 0 462 | helix_stacking_AGGC 0 463 | helix_stacking_AGGG 0 464 | helix_stacking_AGGU 0 465 | helix_stacking_AGUA 0 466 | helix_stacking_AGUC 0 467 | helix_stacking_AGUG 0 468 | helix_stacking_AGUU 0 469 | helix_stacking_AUAC 0 470 | helix_stacking_AUAG 0 471 | helix_stacking_AUAU 0.1482005248 472 | helix_stacking_AUCC 0 473 | helix_stacking_AUCG 0.4343497127 474 | helix_stacking_AUCU 0 475 | helix_stacking_AUGC 0.7079642577 476 | helix_stacking_AUGG 0 477 | helix_stacking_AUGU -0.1010777582 478 | helix_stacking_AUUA 0.243256656 479 | helix_stacking_AUUC 0 480 | helix_stacking_AUUG 0.1623654243 481 | helix_stacking_AUUU 0 482 | helix_stacking_CAAC 0 483 | helix_stacking_CAAG 0 484 | helix_stacking_CAAU 0 485 | helix_stacking_CACC 0 486 | helix_stacking_CACG 0 487 | helix_stacking_CACU 0 488 | helix_stacking_CAGC 0 489 | helix_stacking_CAGG 0 490 | helix_stacking_CAGU 0 491 | helix_stacking_CAUC 0 492 | helix_stacking_CAUG 0 493 | helix_stacking_CAUU 0 494 | helix_stacking_CCAG 0 495 | helix_stacking_CCAU 0 496 | helix_stacking_CCCC 0 497 | helix_stacking_CCCG 0 498 | helix_stacking_CCCU 0 499 | helix_stacking_CCGC 0 500 | helix_stacking_CCGG 0 501 | helix_stacking_CCGU 0 502 | helix_stacking_CCUC 0 503 | helix_stacking_CCUG 0 504 | helix_stacking_CCUU 0 505 | helix_stacking_CGAG 0 506 | helix_stacking_CGAU 0.4878707793 507 | helix_stacking_CGCG 0.8481320247 508 | helix_stacking_CGCU 0 509 | helix_stacking_CGGC 0.4784248478 510 | helix_stacking_CGGG 0 511 | helix_stacking_CGGU -0.1811268205 512 | helix_stacking_CGUC 0 513 | helix_stacking_CGUG 0.4849351028 514 | helix_stacking_CGUU 0 515 | helix_stacking_CUAG 0 516 | helix_stacking_CUAU 0 517 | helix_stacking_CUCG 0 518 | helix_stacking_CUCU 0 519 | helix_stacking_CUGG 0 520 | helix_stacking_CUGU 0 521 | helix_stacking_CUUC 0 522 | helix_stacking_CUUG 0 523 | helix_stacking_CUUU 0 524 | helix_stacking_GAAG 0 525 | helix_stacking_GAAU 0 526 | helix_stacking_GACG 0 527 | helix_stacking_GACU 0 528 | helix_stacking_GAGG 0 529 | helix_stacking_GAGU 0 530 | helix_stacking_GAUG 0 531 | helix_stacking_GAUU 0 532 | helix_stacking_GCAU 0.5551785831 533 | helix_stacking_GCCG 0.5008324248 534 | helix_stacking_GCCU 0 535 | helix_stacking_GCGG 0 536 | helix_stacking_GCGU 0.2165962476 537 | helix_stacking_GCUG 0.4864603589 538 | helix_stacking_GCUU 0 539 | helix_stacking_GGAU 0 540 | helix_stacking_GGCU 0 541 | helix_stacking_GGGG 0 542 | helix_stacking_GGGU 0 543 | helix_stacking_GGUG 0 544 | helix_stacking_GGUU 0 545 | helix_stacking_GUAU -0.04665365028 546 | helix_stacking_GUCU 0 547 | helix_stacking_GUGU 0.1833447295 548 | helix_stacking_GUUG -0.2858970755 549 | helix_stacking_GUUU 0 550 | helix_stacking_UAAU 0.3897593783 551 | helix_stacking_UACU 0 552 | helix_stacking_UAGU -0.1157333764 553 | helix_stacking_UAUU 0 554 | helix_stacking_UCCU 0 555 | helix_stacking_UCGU 0 556 | helix_stacking_UCUU 0 557 | helix_stacking_UGGU 0.120296538 558 | helix_stacking_UGUU 0 559 | helix_stacking_UUUU 0 560 | helix_closing_AA 0 561 | helix_closing_AC 0 562 | helix_closing_AG 0 563 | helix_closing_AU -0.9770893163 564 | helix_closing_CA 0 565 | helix_closing_CC 0 566 | helix_closing_CG -0.4574650937 567 | helix_closing_CU 0 568 | helix_closing_GA 0 569 | helix_closing_GC -0.8265995623 570 | helix_closing_GG 0 571 | helix_closing_GU -1.051678928 572 | helix_closing_UA -0.9246140521 573 | helix_closing_UC 0 574 | helix_closing_UG -0.3698708172 575 | helix_closing_UU 0 576 | multi_base -1.199055076 577 | multi_unpaired -0.1983300391 578 | multi_paired -0.9253883752 579 | dangle_left_AAA 0 580 | dangle_left_AAC 0 581 | dangle_left_AAG 0 582 | dangle_left_AAU 0 583 | dangle_left_ACA 0 584 | dangle_left_ACC 0 585 | dangle_left_ACG 0 586 | dangle_left_ACU 0 587 | dangle_left_AGA 0 588 | dangle_left_AGC 0 589 | dangle_left_AGG 0 590 | dangle_left_AGU 0 591 | dangle_left_AUA -0.1251037681 592 | dangle_left_AUC 0.0441606708 593 | dangle_left_AUG -0.02541879082 594 | dangle_left_AUU 0.00785098466 595 | dangle_left_CAA 0 596 | dangle_left_CAC 0 597 | dangle_left_CAG 0 598 | dangle_left_CAU 0 599 | dangle_left_CCA 0 600 | dangle_left_CCC 0 601 | dangle_left_CCG 0 602 | dangle_left_CCU 0 603 | dangle_left_CGA 0.07224381372 604 | dangle_left_CGC 0.05279281874 605 | dangle_left_CGG 0.1009554299 606 | dangle_left_CGU -0.1515059013 607 | dangle_left_CUA 0 608 | dangle_left_CUC 0 609 | dangle_left_CUG 0 610 | dangle_left_CUU 0 611 | dangle_left_GAA 0 612 | dangle_left_GAC 0 613 | dangle_left_GAG 0 614 | dangle_left_GAU 0 615 | dangle_left_GCA -0.1829535099 616 | dangle_left_GCC 0.03393000394 617 | dangle_left_GCG 0.1335339061 618 | dangle_left_GCU -0.1604274506 619 | dangle_left_GGA 0 620 | dangle_left_GGC 0 621 | dangle_left_GGG 0 622 | dangle_left_GGU 0 623 | dangle_left_GUA -0.06517511341 624 | dangle_left_GUC -0.04250882422 625 | dangle_left_GUG 0.02875971806 626 | dangle_left_GUU -0.04359727428 627 | dangle_left_UAA -0.03373847659 628 | dangle_left_UAC -0.005070324324 629 | dangle_left_UAG -0.1186861149 630 | dangle_left_UAU -0.01162357727 631 | dangle_left_UCA 0 632 | dangle_left_UCC 0 633 | dangle_left_UCG 0 634 | dangle_left_UCU 0 635 | dangle_left_UGA -0.08047139148 636 | dangle_left_UGC 0.001608000669 637 | dangle_left_UGG 0.1016272216 638 | dangle_left_UGU -0.09200842832 639 | dangle_left_UUA 0 640 | dangle_left_UUC 0 641 | dangle_left_UUG 0 642 | dangle_left_UUU 0 643 | dangle_right_AAA 0 644 | dangle_right_AAC 0 645 | dangle_right_AAG 0 646 | dangle_right_AAU 0 647 | dangle_right_ACA 0 648 | dangle_right_ACC 0 649 | dangle_right_ACG 0 650 | dangle_right_ACU 0 651 | dangle_right_AGA 0 652 | dangle_right_AGC 0 653 | dangle_right_AGG 0 654 | dangle_right_AGU 0 655 | dangle_right_AUA 0.03232578201 656 | dangle_right_AUC -0.09096819493 657 | dangle_right_AUG -0.0740750973 658 | dangle_right_AUU -0.01621157379 659 | dangle_right_CAA 0 660 | dangle_right_CAC 0 661 | dangle_right_CAG 0 662 | dangle_right_CAU 0 663 | dangle_right_CCA 0 664 | dangle_right_CCC 0 665 | dangle_right_CCG 0 666 | dangle_right_CCU 0 667 | dangle_right_CGA 0.2133964379 668 | dangle_right_CGC -0.06234810991 669 | dangle_right_CGG -0.07008531041 670 | dangle_right_CGU -0.2141912285 671 | dangle_right_CUA 0 672 | dangle_right_CUC 0 673 | dangle_right_CUG 0 674 | dangle_right_CUU 0 675 | dangle_right_GAA 0 676 | dangle_right_GAC 0 677 | dangle_right_GAG 0 678 | dangle_right_GAU 0 679 | dangle_right_GCA 0.01581957549 680 | dangle_right_GCC 0.005644320058 681 | dangle_right_GCG -0.00943297687 682 | dangle_right_GCU -0.2597793095 683 | dangle_right_GGA 0 684 | dangle_right_GGC 0 685 | dangle_right_GGG 0 686 | dangle_right_GGU 0 687 | dangle_right_GUA -0.04480271781 688 | dangle_right_GUC -0.07321213002 689 | dangle_right_GUG 0.01270494867 690 | dangle_right_GUU -0.05717033985 691 | dangle_right_UAA -0.1631918513 692 | dangle_right_UAC 0.06769304994 693 | dangle_right_UAG -0.08789074414 694 | dangle_right_UAU -0.05525570007 695 | dangle_right_UCA 0 696 | dangle_right_UCC 0 697 | dangle_right_UCG 0 698 | dangle_right_UCU 0 699 | dangle_right_UGA 0.04105458185 700 | dangle_right_UGC -0.008136642572 701 | dangle_right_UGG -0.03808592022 702 | dangle_right_UGU -0.08629373429 703 | dangle_right_UUA 0 704 | dangle_right_UUC 0 705 | dangle_right_UUG 0 706 | dangle_right_UUU 0 707 | external_unpaired -0.00972883093 708 | external_paired -0.0009674111431 709 | -------------------------------------------------------------------------------- /parameter_files/learntofold.contrafold.params: -------------------------------------------------------------------------------- 1 | base_pair_AA 0 2 | base_pair_AC 0 3 | base_pair_AG 0 4 | base_pair_AU 0.117196 5 | base_pair_CC 0 6 | base_pair_CG 0.42785 7 | base_pair_CU 0 8 | base_pair_GG 0 9 | base_pair_GU -0.144535 10 | base_pair_UU 0 11 | terminal_mismatch_AAAA 0 12 | terminal_mismatch_AAAC 0 13 | terminal_mismatch_AAAG 0 14 | terminal_mismatch_AAAU 0 15 | terminal_mismatch_AACA 0 16 | terminal_mismatch_AACC 0 17 | terminal_mismatch_AACG 0 18 | terminal_mismatch_AACU 0 19 | terminal_mismatch_AAGA 0 20 | terminal_mismatch_AAGC 0 21 | terminal_mismatch_AAGG 0 22 | terminal_mismatch_AAGU 0 23 | terminal_mismatch_AAUA 0 24 | terminal_mismatch_AAUC 0 25 | terminal_mismatch_AAUG 0 26 | terminal_mismatch_AAUU 0 27 | terminal_mismatch_ACAA 0 28 | terminal_mismatch_ACAC 0 29 | terminal_mismatch_ACAG 0 30 | terminal_mismatch_ACAU 0 31 | terminal_mismatch_ACCA 0 32 | terminal_mismatch_ACCC 0 33 | terminal_mismatch_ACCG 0 34 | terminal_mismatch_ACCU 0 35 | terminal_mismatch_ACGA 0 36 | terminal_mismatch_ACGC 0 37 | terminal_mismatch_ACGG 0 38 | terminal_mismatch_ACGU 0 39 | terminal_mismatch_ACUA 0 40 | terminal_mismatch_ACUC 0 41 | terminal_mismatch_ACUG 0 42 | terminal_mismatch_ACUU 0 43 | terminal_mismatch_AGAA 0 44 | terminal_mismatch_AGAC 0 45 | terminal_mismatch_AGAG 0 46 | terminal_mismatch_AGAU 0 47 | terminal_mismatch_AGCA 0 48 | terminal_mismatch_AGCC 0 49 | terminal_mismatch_AGCG 0 50 | terminal_mismatch_AGCU 0 51 | terminal_mismatch_AGGA 0 52 | terminal_mismatch_AGGC 0 53 | terminal_mismatch_AGGG 0 54 | terminal_mismatch_AGGU 0 55 | terminal_mismatch_AGUA 0 56 | terminal_mismatch_AGUC 0 57 | terminal_mismatch_AGUG 0 58 | terminal_mismatch_AGUU 0 59 | terminal_mismatch_AUAA -0.168158 60 | terminal_mismatch_AUAC -0.242468 61 | terminal_mismatch_AUAG -0.171538 62 | terminal_mismatch_AUAU 0.063824 63 | terminal_mismatch_AUCA -0.136324 64 | terminal_mismatch_AUCC 0.0340154 65 | terminal_mismatch_AUCG 0.412095 66 | terminal_mismatch_AUCU -0.158066 67 | terminal_mismatch_AUGA 0.235308 68 | terminal_mismatch_AUGC 0.446161 69 | terminal_mismatch_AUGG -0.31236 70 | terminal_mismatch_AUGU -0.174198 71 | terminal_mismatch_AUUA 0.427164 72 | terminal_mismatch_AUUC 0.351693 73 | terminal_mismatch_AUUG 0.112834 74 | terminal_mismatch_AUUU 0.0114197 75 | terminal_mismatch_CAAA 0 76 | terminal_mismatch_CAAC 0 77 | terminal_mismatch_CAAG 0 78 | terminal_mismatch_CAAU 0 79 | terminal_mismatch_CACA 0 80 | terminal_mismatch_CACC 0 81 | terminal_mismatch_CACG 0 82 | terminal_mismatch_CACU 0 83 | terminal_mismatch_CAGA 0 84 | terminal_mismatch_CAGC 0 85 | terminal_mismatch_CAGG 0 86 | terminal_mismatch_CAGU 0 87 | terminal_mismatch_CAUA 0 88 | terminal_mismatch_CAUC 0 89 | terminal_mismatch_CAUG 0 90 | terminal_mismatch_CAUU 0 91 | terminal_mismatch_CCAA 0 92 | terminal_mismatch_CCAC 0 93 | terminal_mismatch_CCAG 0 94 | terminal_mismatch_CCAU 0 95 | terminal_mismatch_CCCA 0 96 | terminal_mismatch_CCCC 0 97 | terminal_mismatch_CCCG 0 98 | terminal_mismatch_CCCU 0 99 | terminal_mismatch_CCGA 0 100 | terminal_mismatch_CCGC 0 101 | terminal_mismatch_CCGG 0 102 | terminal_mismatch_CCGU 0 103 | terminal_mismatch_CCUA 0 104 | terminal_mismatch_CCUC 0 105 | terminal_mismatch_CCUG 0 106 | terminal_mismatch_CCUU 0 107 | terminal_mismatch_CGAA -0.109134 108 | terminal_mismatch_CGAC -0.316447 109 | terminal_mismatch_CGAG -0.62242 110 | terminal_mismatch_CGAU 0.0216624 111 | terminal_mismatch_CGCA 0.0388758 112 | terminal_mismatch_CGCC -0.281257 113 | terminal_mismatch_CGCG 0.241614 114 | terminal_mismatch_CGCU -0.397997 115 | terminal_mismatch_CGGA 0.327717 116 | terminal_mismatch_CGGC 0.110783 117 | terminal_mismatch_CGGG -0.527171 118 | terminal_mismatch_CGGU -0.429919 119 | terminal_mismatch_CGUA 0.171414 120 | terminal_mismatch_CGUC -0.279608 121 | terminal_mismatch_CGUG 0.100497 122 | terminal_mismatch_CGUU -0.248438 123 | terminal_mismatch_CUAA 0 124 | terminal_mismatch_CUAC 0 125 | terminal_mismatch_CUAG 0 126 | terminal_mismatch_CUAU 0 127 | terminal_mismatch_CUCA 0 128 | terminal_mismatch_CUCC 0 129 | terminal_mismatch_CUCG 0 130 | terminal_mismatch_CUCU 0 131 | terminal_mismatch_CUGA 0 132 | terminal_mismatch_CUGC 0 133 | terminal_mismatch_CUGG 0 134 | terminal_mismatch_CUGU 0 135 | terminal_mismatch_CUUA 0 136 | terminal_mismatch_CUUC 0 137 | terminal_mismatch_CUUG 0 138 | terminal_mismatch_CUUU 0 139 | terminal_mismatch_GAAA 0 140 | terminal_mismatch_GAAC 0 141 | terminal_mismatch_GAAG 0 142 | terminal_mismatch_GAAU 0 143 | terminal_mismatch_GACA 0 144 | terminal_mismatch_GACC 0 145 | terminal_mismatch_GACG 0 146 | terminal_mismatch_GACU 0 147 | terminal_mismatch_GAGA 0 148 | terminal_mismatch_GAGC 0 149 | terminal_mismatch_GAGG 0 150 | terminal_mismatch_GAGU 0 151 | terminal_mismatch_GAUA 0 152 | terminal_mismatch_GAUC 0 153 | terminal_mismatch_GAUG 0 154 | terminal_mismatch_GAUU 0 155 | terminal_mismatch_GCAA -0.566345 156 | terminal_mismatch_GCAC -0.0306717 157 | terminal_mismatch_GCAG -0.266614 158 | terminal_mismatch_GCAU -0.154598 159 | terminal_mismatch_GCCA -0.316693 160 | terminal_mismatch_GCCC -0.131361 161 | terminal_mismatch_GCCG 0.363139 162 | terminal_mismatch_GCCU -0.41638 163 | terminal_mismatch_GCGA 0.353058 164 | terminal_mismatch_GCGC 0.368934 165 | terminal_mismatch_GCGG -0.0630469 166 | terminal_mismatch_GCGU -0.255096 167 | terminal_mismatch_GCUA 0.0728846 168 | terminal_mismatch_GCUC -0.0480102 169 | terminal_mismatch_GCUG 0.374379 170 | terminal_mismatch_GCUU 0.0624913 171 | terminal_mismatch_GGAA 0 172 | terminal_mismatch_GGAC 0 173 | terminal_mismatch_GGAG 0 174 | terminal_mismatch_GGAU 0 175 | terminal_mismatch_GGCA 0 176 | terminal_mismatch_GGCC 0 177 | terminal_mismatch_GGCG 0 178 | terminal_mismatch_GGCU 0 179 | terminal_mismatch_GGGA 0 180 | terminal_mismatch_GGGC 0 181 | terminal_mismatch_GGGG 0 182 | terminal_mismatch_GGGU 0 183 | terminal_mismatch_GGUA 0 184 | terminal_mismatch_GGUC 0 185 | terminal_mismatch_GGUG 0 186 | terminal_mismatch_GGUU 0 187 | terminal_mismatch_GUAA -0.22414 188 | terminal_mismatch_GUAC -0.133311 189 | terminal_mismatch_GUAG -0.359489 190 | terminal_mismatch_GUAU -0.330393 191 | terminal_mismatch_GUCA 0.0365249 192 | terminal_mismatch_GUCC 0.0615222 193 | terminal_mismatch_GUCG 0.290182 194 | terminal_mismatch_GUCU -0.176866 195 | terminal_mismatch_GUGA -0.19437 196 | terminal_mismatch_GUGC 0.0494159 197 | terminal_mismatch_GUGG -0.203475 198 | terminal_mismatch_GUGU -0.171151 199 | terminal_mismatch_GUUA 0.0401032 200 | terminal_mismatch_GUUC -0.105719 201 | terminal_mismatch_GUUG -0.302561 202 | terminal_mismatch_GUUU -0.445895 203 | terminal_mismatch_UAAA -0.586072 204 | terminal_mismatch_UAAC -0.61438 205 | terminal_mismatch_UAAG -0.405239 206 | terminal_mismatch_UAAU -0.133188 207 | terminal_mismatch_UACA -0.616378 208 | terminal_mismatch_UACC -0.624385 209 | terminal_mismatch_UACG -0.258873 210 | terminal_mismatch_UACU -0.681676 211 | terminal_mismatch_UAGA -0.342396 212 | terminal_mismatch_UAGC 0.239263 213 | terminal_mismatch_UAGG -0.667443 214 | terminal_mismatch_UAGU -0.766636 215 | terminal_mismatch_UAUA -0.308715 216 | terminal_mismatch_UAUC -0.00697584 217 | terminal_mismatch_UAUG -0.502953 218 | terminal_mismatch_UAUU -0.528393 219 | terminal_mismatch_UCAA 0 220 | terminal_mismatch_UCAC 0 221 | terminal_mismatch_UCAG 0 222 | terminal_mismatch_UCAU 0 223 | terminal_mismatch_UCCA 0 224 | terminal_mismatch_UCCC 0 225 | terminal_mismatch_UCCG 0 226 | terminal_mismatch_UCCU 0 227 | terminal_mismatch_UCGA 0 228 | terminal_mismatch_UCGC 0 229 | terminal_mismatch_UCGG 0 230 | terminal_mismatch_UCGU 0 231 | terminal_mismatch_UCUA 0 232 | terminal_mismatch_UCUC 0 233 | terminal_mismatch_UCUG 0 234 | terminal_mismatch_UCUU 0 235 | terminal_mismatch_UGAA 0.0753088 236 | terminal_mismatch_UGAC 0.27512 237 | terminal_mismatch_UGAG -0.050858 238 | terminal_mismatch_UGAU 0.192983 239 | terminal_mismatch_UGCA 0.443018 240 | terminal_mismatch_UGCC 0.0480001 241 | terminal_mismatch_UGCG 0.497822 242 | terminal_mismatch_UGCU 0.157055 243 | terminal_mismatch_UGGA 0.836611 244 | terminal_mismatch_UGGC 0.282301 245 | terminal_mismatch_UGGG 0.0988858 246 | terminal_mismatch_UGGU 0.234094 247 | terminal_mismatch_UGUA 0.114609 248 | terminal_mismatch_UGUC 0.214683 249 | terminal_mismatch_UGUG 0.246988 250 | terminal_mismatch_UGUU 0.371336 251 | terminal_mismatch_UUAA 0 252 | terminal_mismatch_UUAC 0 253 | terminal_mismatch_UUAG 0 254 | terminal_mismatch_UUAU 0 255 | terminal_mismatch_UUCA 0 256 | terminal_mismatch_UUCC 0 257 | terminal_mismatch_UUCG 0 258 | terminal_mismatch_UUCU 0 259 | terminal_mismatch_UUGA 0 260 | terminal_mismatch_UUGC 0 261 | terminal_mismatch_UUGG 0 262 | terminal_mismatch_UUGU 0 263 | terminal_mismatch_UUUA 0 264 | terminal_mismatch_UUUC 0 265 | terminal_mismatch_UUUG 0 266 | terminal_mismatch_UUUU 0 267 | hairpin_length_at_least_0 -1.84406 268 | hairpin_length_at_least_1 0.38098 269 | hairpin_length_at_least_2 0.859909 270 | hairpin_length_at_least_3 0.295419 271 | hairpin_length_at_least_4 0.7661 272 | hairpin_length_at_least_5 -0.338749 273 | hairpin_length_at_least_6 -0.0639211 274 | hairpin_length_at_least_7 0.315558 275 | hairpin_length_at_least_8 -0.362892 276 | hairpin_length_at_least_9 -0.176655 277 | hairpin_length_at_least_10 -0.263635 278 | hairpin_length_at_least_11 -0.129676 279 | hairpin_length_at_least_12 0.105682 280 | hairpin_length_at_least_13 0.08146 281 | hairpin_length_at_least_14 -0.855376 282 | hairpin_length_at_least_15 -0.0377099 283 | hairpin_length_at_least_16 0.0421525 284 | hairpin_length_at_least_17 0.107804 285 | hairpin_length_at_least_18 -0.216865 286 | hairpin_length_at_least_19 -0.0181023 287 | hairpin_length_at_least_20 -0.225869 288 | hairpin_length_at_least_21 -0.181939 289 | hairpin_length_at_least_22 0.0310624 290 | hairpin_length_at_least_23 -0.0905128 291 | hairpin_length_at_least_24 -0.306419 292 | hairpin_length_at_least_25 -0.13717 293 | hairpin_length_at_least_26 0.132407 294 | hairpin_length_at_least_27 -0.130469 295 | hairpin_length_at_least_28 -0.0067091 296 | hairpin_length_at_least_29 -0.115291 297 | hairpin_length_at_least_30 -0.39803 298 | internal_explicit_1_1 0.155859 299 | internal_explicit_1_2 -0.121667 300 | internal_explicit_1_3 0.0100364 301 | internal_explicit_1_4 0.199334 302 | internal_explicit_2_2 0.130952 303 | internal_explicit_2_3 -0.187011 304 | internal_explicit_2_4 -0.110813 305 | internal_explicit_3_3 0.0529937 306 | internal_explicit_3_4 -0.357182 307 | internal_explicit_4_4 0.12988 308 | bulge_length_at_least_1 -0.10609 309 | bulge_length_at_least_2 -0.294864 310 | bulge_length_at_least_3 -0.36619 311 | bulge_length_at_least_4 -0.577635 312 | bulge_length_at_least_5 -0.404122 313 | bulge_length_at_least_6 -0.508964 314 | bulge_length_at_least_7 -0.0211596 315 | bulge_length_at_least_8 0.749466 316 | bulge_length_at_least_9 -0.532326 317 | bulge_length_at_least_10 -0.585856 318 | bulge_length_at_least_11 -0.356308 319 | bulge_length_at_least_12 0.119846 320 | bulge_length_at_least_13 0.25548 321 | bulge_length_at_least_14 0.146516 322 | bulge_length_at_least_15 -0.546997 323 | bulge_length_at_least_16 0.147717 324 | bulge_length_at_least_17 0.0178208 325 | bulge_length_at_least_18 0.0080868 326 | bulge_length_at_least_19 0.456916 327 | bulge_length_at_least_20 -0.42458 328 | bulge_length_at_least_21 0.145037 329 | bulge_length_at_least_22 -0.105019 330 | bulge_length_at_least_23 -0.342105 331 | bulge_length_at_least_24 -0.0779023 332 | bulge_length_at_least_25 -0.193858 333 | bulge_length_at_least_26 -0.00769006 334 | bulge_length_at_least_27 -0.111807 335 | bulge_length_at_least_28 0.155611 336 | bulge_length_at_least_29 0.335468 337 | bulge_length_at_least_30 1.18348 338 | internal_length_at_least_2 0.0141383 339 | internal_length_at_least_3 -0.0934192 340 | internal_length_at_least_4 -0.0617787 341 | internal_length_at_least_5 -0.115015 342 | internal_length_at_least_6 -0.100272 343 | internal_length_at_least_7 0.260368 344 | internal_length_at_least_8 -0.258777 345 | internal_length_at_least_9 0.0776641 346 | internal_length_at_least_10 -0.249379 347 | internal_length_at_least_11 0.0528477 348 | internal_length_at_least_12 -0.478489 349 | internal_length_at_least_13 -0.106756 350 | internal_length_at_least_14 -0.000894333 351 | internal_length_at_least_15 -0.334079 352 | internal_length_at_least_16 0.0711885 353 | internal_length_at_least_17 -0.203494 354 | internal_length_at_least_18 0.253692 355 | internal_length_at_least_19 -0.232494 356 | internal_length_at_least_20 0.358359 357 | internal_length_at_least_21 -0.366355 358 | internal_length_at_least_22 0.245564 359 | internal_length_at_least_23 -0.489612 360 | internal_length_at_least_24 0.262947 361 | internal_length_at_least_25 -0.433761 362 | internal_length_at_least_26 0.0245611 363 | internal_length_at_least_27 -0.128352 364 | internal_length_at_least_28 0.100132 365 | internal_length_at_least_29 -0.208747 366 | internal_length_at_least_30 0.827826 367 | internal_symmetric_length_at_least_1 0.0656625 368 | internal_symmetric_length_at_least_2 -0.087095 369 | internal_symmetric_length_at_least_3 -0.0711241 370 | internal_symmetric_length_at_least_4 0.0126792 371 | internal_symmetric_length_at_least_5 -0.233107 372 | internal_symmetric_length_at_least_6 -0.112285 373 | internal_symmetric_length_at_least_7 -0.120892 374 | internal_symmetric_length_at_least_8 0.0783225 375 | internal_symmetric_length_at_least_9 -0.120047 376 | internal_symmetric_length_at_least_10 -0.44724 377 | internal_symmetric_length_at_least_11 -0.0132272 378 | internal_symmetric_length_at_least_12 -0.118194 379 | internal_symmetric_length_at_least_13 0.0859623 380 | internal_symmetric_length_at_least_14 -0.178603 381 | internal_symmetric_length_at_least_15 -0.178603 382 | internal_asymmetry_at_least_1 -0.0748923 383 | internal_asymmetry_at_least_2 -0.382543 384 | internal_asymmetry_at_least_3 -0.251796 385 | internal_asymmetry_at_least_4 -0.421874 386 | internal_asymmetry_at_least_5 -0.34332 387 | internal_asymmetry_at_least_6 -0.115644 388 | internal_asymmetry_at_least_7 -0.165334 389 | internal_asymmetry_at_least_8 0.197739 390 | internal_asymmetry_at_least_9 -0.186715 391 | internal_asymmetry_at_least_10 0.076971 392 | internal_asymmetry_at_least_11 0.0362528 393 | internal_asymmetry_at_least_12 -0.220953 394 | internal_asymmetry_at_least_13 0.108824 395 | internal_asymmetry_at_least_14 -0.0164457 396 | internal_asymmetry_at_least_15 0.368713 397 | internal_asymmetry_at_least_16 -0.438663 398 | internal_asymmetry_at_least_17 0.16405 399 | internal_asymmetry_at_least_18 -0.0398533 400 | internal_asymmetry_at_least_19 0.1949 401 | internal_asymmetry_at_least_20 0.0771696 402 | internal_asymmetry_at_least_21 0.41823 403 | internal_asymmetry_at_least_22 -0.632993 404 | internal_asymmetry_at_least_23 -0.116177 405 | internal_asymmetry_at_least_24 -0.12073 406 | internal_asymmetry_at_least_25 0.0344756 407 | internal_asymmetry_at_least_26 -0.0637855 408 | internal_asymmetry_at_least_27 0.264182 409 | internal_asymmetry_at_least_28 0.393391 410 | bulge_0x1_nucleotides_A 0.0265834 411 | bulge_0x1_nucleotides_C 0.187646 412 | bulge_0x1_nucleotides_G 0.213565 413 | bulge_0x1_nucleotides_U 0.139233 414 | internal_1x1_nucleotides_AA 0.115743 415 | internal_1x1_nucleotides_AC 0.0287969 416 | internal_1x1_nucleotides_AG -0.142761 417 | internal_1x1_nucleotides_AU 0.780265 418 | internal_1x1_nucleotides_CC 0.0215604 419 | internal_1x1_nucleotides_CG 0.834524 420 | internal_1x1_nucleotides_CU 0.0301214 421 | internal_1x1_nucleotides_GG 0.220881 422 | internal_1x1_nucleotides_GU 0.608098 423 | internal_1x1_nucleotides_UU 0.161178 424 | helix_stacking_AAAA 0 425 | helix_stacking_AAAC 0 426 | helix_stacking_AAAG 0 427 | helix_stacking_AAAU 0 428 | helix_stacking_AACA 0 429 | helix_stacking_AACC 0 430 | helix_stacking_AACG 0 431 | helix_stacking_AACU 0 432 | helix_stacking_AAGA 0 433 | helix_stacking_AAGC 0 434 | helix_stacking_AAGG 0 435 | helix_stacking_AAGU 0 436 | helix_stacking_AAUA 0 437 | helix_stacking_AAUC 0 438 | helix_stacking_AAUG 0 439 | helix_stacking_AAUU 0 440 | helix_stacking_ACAC 0 441 | helix_stacking_ACAG 0 442 | helix_stacking_ACAU 0 443 | helix_stacking_ACCA 0 444 | helix_stacking_ACCC 0 445 | helix_stacking_ACCG 0 446 | helix_stacking_ACCU 0 447 | helix_stacking_ACGA 0 448 | helix_stacking_ACGC 0 449 | helix_stacking_ACGG 0 450 | helix_stacking_ACGU 0 451 | helix_stacking_ACUA 0 452 | helix_stacking_ACUC 0 453 | helix_stacking_ACUG 0 454 | helix_stacking_ACUU 0 455 | helix_stacking_AGAC 0 456 | helix_stacking_AGAG 0 457 | helix_stacking_AGAU 0 458 | helix_stacking_AGCC 0 459 | helix_stacking_AGCG 0 460 | helix_stacking_AGCU 0 461 | helix_stacking_AGGA 0 462 | helix_stacking_AGGC 0 463 | helix_stacking_AGGG 0 464 | helix_stacking_AGGU 0 465 | helix_stacking_AGUA 0 466 | helix_stacking_AGUC 0 467 | helix_stacking_AGUG 0 468 | helix_stacking_AGUU 0 469 | helix_stacking_AUAC 0 470 | helix_stacking_AUAG 0 471 | helix_stacking_AUAU 0.166949 472 | helix_stacking_AUCC 0 473 | helix_stacking_AUCG 0.457814 474 | helix_stacking_AUCU 0 475 | helix_stacking_AUGC 0.625282 476 | helix_stacking_AUGG 0 477 | helix_stacking_AUGU -0.0635901 478 | helix_stacking_AUUA 0.484831 479 | helix_stacking_AUUC 0 480 | helix_stacking_AUUG 0.229207 481 | helix_stacking_AUUU 0 482 | helix_stacking_CAAC 0 483 | helix_stacking_CAAG 0 484 | helix_stacking_CAAU 0 485 | helix_stacking_CACC 0 486 | helix_stacking_CACG 0 487 | helix_stacking_CACU 0 488 | helix_stacking_CAGC 0 489 | helix_stacking_CAGG 0 490 | helix_stacking_CAGU 0 491 | helix_stacking_CAUC 0 492 | helix_stacking_CAUG 0 493 | helix_stacking_CAUU 0 494 | helix_stacking_CCAG 0 495 | helix_stacking_CCAU 0 496 | helix_stacking_CCCC 0 497 | helix_stacking_CCCG 0 498 | helix_stacking_CCCU 0 499 | helix_stacking_CCGC 0 500 | helix_stacking_CCGG 0 501 | helix_stacking_CCGU 0 502 | helix_stacking_CCUC 0 503 | helix_stacking_CCUG 0 504 | helix_stacking_CCUU 0 505 | helix_stacking_CGAG 0 506 | helix_stacking_CGAU 0.60886 507 | helix_stacking_CGCG 0.927152 508 | helix_stacking_CGCU 0 509 | helix_stacking_CGGC 0.483599 510 | helix_stacking_CGGG 0 511 | helix_stacking_CGGU 0.00568172 512 | helix_stacking_CGUC 0 513 | helix_stacking_CGUG 0.370247 514 | helix_stacking_CGUU 0 515 | helix_stacking_CUAG 0 516 | helix_stacking_CUAU 0 517 | helix_stacking_CUCG 0 518 | helix_stacking_CUCU 0 519 | helix_stacking_CUGG 0 520 | helix_stacking_CUGU 0 521 | helix_stacking_CUUC 0 522 | helix_stacking_CUUG 0 523 | helix_stacking_CUUU 0 524 | helix_stacking_GAAG 0 525 | helix_stacking_GAAU 0 526 | helix_stacking_GACG 0 527 | helix_stacking_GACU 0 528 | helix_stacking_GAGG 0 529 | helix_stacking_GAGU 0 530 | helix_stacking_GAUG 0 531 | helix_stacking_GAUU 0 532 | helix_stacking_GCAU 0.342121 533 | helix_stacking_GCCG 0.77176 534 | helix_stacking_GCCU 0 535 | helix_stacking_GCGG 0 536 | helix_stacking_GCGU 0.313625 537 | helix_stacking_GCUG 0.474024 538 | helix_stacking_GCUU 0 539 | helix_stacking_GGAU 0 540 | helix_stacking_GGCU 0 541 | helix_stacking_GGGG 0 542 | helix_stacking_GGGU 0 543 | helix_stacking_GGUG 0 544 | helix_stacking_GGUU 0 545 | helix_stacking_GUAU -0.0905706 546 | helix_stacking_GUCU 0 547 | helix_stacking_GUGU 0.175914 548 | helix_stacking_GUUG -0.265254 549 | helix_stacking_GUUU 0 550 | helix_stacking_UAAU 0.285857 551 | helix_stacking_UACU 0 552 | helix_stacking_UAGU -0.0092986 553 | helix_stacking_UAUU 0 554 | helix_stacking_UCCU 0 555 | helix_stacking_UCGU 0 556 | helix_stacking_UCUU 0 557 | helix_stacking_UGGU 0.605438 558 | helix_stacking_UGUU 0 559 | helix_stacking_UUUU 0 560 | helix_closing_AA 0 561 | helix_closing_AC 0 562 | helix_closing_AG 0 563 | helix_closing_AU -0.904257 564 | helix_closing_CA 0 565 | helix_closing_CC 0 566 | helix_closing_CG -0.447655 567 | helix_closing_CU 0 568 | helix_closing_GA 0 569 | helix_closing_GC -0.664996 570 | helix_closing_GG 0 571 | helix_closing_GU -0.551376 572 | helix_closing_UA -0.469223 573 | helix_closing_UC 0 574 | helix_closing_UG -0.690579 575 | helix_closing_UU 0 576 | multi_base 0.392109 577 | multi_unpaired -0.0305723 578 | multi_paired -0.324548 579 | dangle_left_AAA 0 580 | dangle_left_AAC 0 581 | dangle_left_AAG 0 582 | dangle_left_AAU 0 583 | dangle_left_ACA 0 584 | dangle_left_ACC 0 585 | dangle_left_ACG 0 586 | dangle_left_ACU 0 587 | dangle_left_AGA 0 588 | dangle_left_AGC 0 589 | dangle_left_AGG 0 590 | dangle_left_AGU 0 591 | dangle_left_AUA -0.0096949 592 | dangle_left_AUC 0.296587 593 | dangle_left_AUG 0.264354 594 | dangle_left_AUU 0.467729 595 | dangle_left_CAA 0 596 | dangle_left_CAC 0 597 | dangle_left_CAG 0 598 | dangle_left_CAU 0 599 | dangle_left_CCA 0 600 | dangle_left_CCC 0 601 | dangle_left_CCG 0 602 | dangle_left_CCU 0 603 | dangle_left_CGA 0.196253 604 | dangle_left_CGC 0.440535 605 | dangle_left_CGG 0.390397 606 | dangle_left_CGU 0.139024 607 | dangle_left_CUA 0 608 | dangle_left_CUC 0 609 | dangle_left_CUG 0 610 | dangle_left_CUU 0 611 | dangle_left_GAA 0 612 | dangle_left_GAC 0 613 | dangle_left_GAG 0 614 | dangle_left_GAU 0 615 | dangle_left_GCA -0.320284 616 | dangle_left_GCC -0.181196 617 | dangle_left_GCG 0.0390977 618 | dangle_left_GCU 0.175603 619 | dangle_left_GGA 0 620 | dangle_left_GGC 0 621 | dangle_left_GGG 0 622 | dangle_left_GGU 0 623 | dangle_left_GUA -0.0839476 624 | dangle_left_GUC 0.148304 625 | dangle_left_GUG 0.0216176 626 | dangle_left_GUU 0.053797 627 | dangle_left_UAA -0.0866879 628 | dangle_left_UAC -0.250894 629 | dangle_left_UAG -0.322181 630 | dangle_left_UAU -0.0654954 631 | dangle_left_UCA 0 632 | dangle_left_UCC 0 633 | dangle_left_UCG 0 634 | dangle_left_UCU 0 635 | dangle_left_UGA -0.168554 636 | dangle_left_UGC 0.117638 637 | dangle_left_UGG 0.304698 638 | dangle_left_UGU 0.0870223 639 | dangle_left_UUA 0 640 | dangle_left_UUC 0 641 | dangle_left_UUG 0 642 | dangle_left_UUU 0 643 | dangle_right_AAA 0 644 | dangle_right_AAC 0 645 | dangle_right_AAG 0 646 | dangle_right_AAU 0 647 | dangle_right_ACA 0 648 | dangle_right_ACC 0 649 | dangle_right_ACG 0 650 | dangle_right_ACU 0 651 | dangle_right_AGA 0 652 | dangle_right_AGC 0 653 | dangle_right_AGG 0 654 | dangle_right_AGU 0 655 | dangle_right_AUA -0.927456 656 | dangle_right_AUC -1.10559 657 | dangle_right_AUG -0.981522 658 | dangle_right_AUU -0.995162 659 | dangle_right_CAA 0 660 | dangle_right_CAC 0 661 | dangle_right_CAG 0 662 | dangle_right_CAU 0 663 | dangle_right_CCA 0 664 | dangle_right_CCC 0 665 | dangle_right_CCG 0 666 | dangle_right_CCU 0 667 | dangle_right_CGA -0.82867 668 | dangle_right_CGC -1.11699 669 | dangle_right_CGG -1.23095 670 | dangle_right_CGU -1.23702 671 | dangle_right_CUA 0 672 | dangle_right_CUC 0 673 | dangle_right_CUG 0 674 | dangle_right_CUU 0 675 | dangle_right_GAA 0 676 | dangle_right_GAC 0 677 | dangle_right_GAG 0 678 | dangle_right_GAU 0 679 | dangle_right_GCA -0.532095 680 | dangle_right_GCC -0.54946 681 | dangle_right_GCG -0.398636 682 | dangle_right_GCU -0.868356 683 | dangle_right_GGA 0 684 | dangle_right_GGC 0 685 | dangle_right_GGG 0 686 | dangle_right_GGU 0 687 | dangle_right_GUA -0.908315 688 | dangle_right_GUC -0.876077 689 | dangle_right_GUG -0.991237 690 | dangle_right_GUU -1.08336 691 | dangle_right_UAA -1.04753 692 | dangle_right_UAC -0.918508 693 | dangle_right_UAG -1.1966 694 | dangle_right_UAU -1.07818 695 | dangle_right_UCA 0 696 | dangle_right_UCC 0 697 | dangle_right_UCG 0 698 | dangle_right_UCU 0 699 | dangle_right_UGA -0.463016 700 | dangle_right_UGC -0.463076 701 | dangle_right_UGG -0.779374 702 | dangle_right_UGU -0.559652 703 | dangle_right_UUA 0 704 | dangle_right_UUC 0 705 | dangle_right_UUG 0 706 | dangle_right_UUU 0 707 | external_unpaired -0.144898 708 | external_paired -1.54974 709 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | 2 | [project] 3 | name = "arnie" 4 | version = "0.2.7" 5 | authors = [ 6 | { name="Das Lab", email="thedaslab@stanford.edu" }, 7 | ] 8 | description = "A Python utility library to estimate, compare, and reweight RNA energetics across many secondary structure algorithms." 9 | readme = "README.md" 10 | requires-python = ">=3.7" 11 | classifiers = [ 12 | "Programming Language :: Python :: 3", 13 | "License :: OSI Approved :: MIT License", 14 | "Operating System :: OS Independent", 15 | ] 16 | keywords = ["RNA", "RNA structure prediction", "Bioinformatics"] 17 | dependencies = [ 18 | "numpy>=1.15", 19 | "scipy>=1.5.0", 20 | "matplotlib>=3.0.0" 21 | ] 22 | 23 | [project.urls] 24 | "Homepage" = "https://github.com/DasLab/arnie" 25 | "Documentation" = "https://daslab.github.io/arnie" 26 | "Bug Tracker" = "https://github.com/DasLab/arnie/issues" 27 | 28 | [build-system] 29 | requires = ["setuptools>=61.0"] 30 | build-backend = "setuptools.build_meta" 31 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | pythonpath = src -------------------------------------------------------------------------------- /scripts/score_pseudoacc_mea.py: -------------------------------------------------------------------------------- 1 | from arnie.mea.mea import * 2 | import numpy as np 3 | from glob import glob 4 | import argparse 5 | import sys, os 6 | 7 | def predict_MEA_structures(matrix_list, gamma_min=-7, gamma_max=7, verbose=False, metric='mcc', output_dir='MEA_output'): 8 | '''Estimate maximum expected pseudoaccuracy structures per Hamada et al. BMC Bioinf 2010 11:586. 9 | 10 | Note: Files in matrix_dir and true_structs need to have the same names corresponding to their same constructs, but suffixes don't matter. 11 | 12 | Inputs: 13 | 14 | matrix_dir: list of NxN base pair probability matrices. 15 | gamma_min, gamma_max: min/max log_2(gamma) value used, defaults are -7 and 7. 16 | metric: keyword-based, which metric to use to select structure. Options are 'sen', 'ppv', 'mcc', 'fscore'. 17 | verbose: print output or not (for command line use) 18 | 19 | Outputs: 20 | List of predicted structures (in dbn format) at each gamma. 21 | 22 | ''' 23 | 24 | metric_ind = ['sen', 'ppv', 'mcc', 'fscore'].index(metric) 25 | 26 | if len(matrix_list) == 0: 27 | raise ValueError('No matrix files found!') 28 | 29 | matrices = [np.loadtxt(x) for x in matrix_list] 30 | pdb_indices = [os.path.basename(x).split('.')[0] for x in matrix_list] 31 | 32 | n_constructs = len(matrices) 33 | 34 | gamma_vals = [x for x in range(gamma_min, gamma_max)] 35 | best_metric_values, best_gammas, best_structs,best_metrics = [],[],[],[] 36 | 37 | metrics_across_gammas = {k:[] for k in gamma_vals} 38 | 39 | if verbose: print('\nmetric\tpdb_ind\tbest_log2g\tbest_metric_value\tbest_struct') 40 | 41 | for i, matrix in enumerate(matrices): 42 | 43 | running_best_metrics = [] 44 | running_best_value = 0 45 | running_best_gamma = -101 46 | running_best_struct = '' 47 | 48 | for g in gamma_vals: 49 | 50 | mea_cls = MEA(matrix, gamma=2**g) 51 | 52 | metrics = mea_cls.score_expected() #sen, ppv, mcc, fscore 53 | metrics_across_gammas[g].append(metrics) 54 | 55 | if metrics[metric_ind] > running_best_value: 56 | running_best_value = metrics[metric_ind] 57 | running_best_metrics = metrics 58 | running_best_gamma = g 59 | running_best_struct = mea_cls.structure 60 | 61 | best_metrics.append(running_best_metrics) 62 | best_metric_values.append(running_best_value) 63 | best_gammas.append(running_best_gamma) 64 | best_structs.append(running_best_struct) 65 | 66 | if verbose: print("%s\t%s\t%d\t%.3f\t%s" % (metric, pdb_indices[i], running_best_gamma, running_best_value, running_best_struct)) 67 | 68 | # print('Avg metrics across gamma vals') 69 | 70 | print('\t\tlog2(g)\tsen\tppv\tmcc\tfscore') 71 | 72 | for g in gamma_vals: 73 | 74 | [sen, ppv, mcc, fscore] = np.mean(metrics_across_gammas[g], axis=0) 75 | print('gamma_avg\t%d\t%.3f\t%.3f\t%.3f\t%.3f' % (g, sen, ppv, mcc, fscore)) 76 | 77 | # print('Best avg metrics using individual gammas') 78 | [sen, ppv, mcc, fscore] = np.mean(np.array(best_metrics), axis=0) 79 | 80 | print('gamma_best\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f' % (np.mean(best_gammas), sen, ppv, mcc, fscore)) 81 | 82 | if not os.path.exists(output_dir): 83 | os.makedirs(output_dir) 84 | 85 | for struct, ind in list(zip(best_structs, pdb_indices)): 86 | if os.path.exists('%s/%s.dbn' % (output_dir, ind)): 87 | print('NB: overwriting existing predicted structure') 88 | with open('%s/%s.dbn' % (output_dir, ind), 'w') as f: 89 | f.write(struct) 90 | 91 | return best_structs 92 | 93 | def score_against_true_structs(pred_struct_list, true_struct_list, verbose=False, weight_by_n_bps=False): 94 | '''Score maximum expected pseudoaccuracy structures against provided 3D structures. 95 | 96 | Note: Files in matrix_dir and true_structs need to have the same names corresponding 97 | to their same constructs, but suffixes don't matter. 98 | 99 | Inputs: 100 | 101 | pred_struct_list: list of predicted structures. 102 | true_structs: list of NxN true structure base pair matrices. Can be 103 | symmetric matrices or not; upper triangle is taken. 104 | verbose: print output or not (for command line use) 105 | 106 | Outputs: 107 | 108 | SEN: TP/(TP+FN), library keyed by gamma values used. 109 | PPV: TP/(TP+FP), " 110 | MCC: Mathews correlation coefficient 111 | Fscore: 2*TP/(2*TP + FP + FN) 112 | 113 | ''' 114 | pred_structs, true_structs = [], [] 115 | 116 | if len(pred_struct_list) == 0: 117 | raise ValueError('No predicted structure files found!') 118 | 119 | if len(true_struct_list) == 0: 120 | raise ValueError('No ground truth structure files found!') 121 | 122 | for x in pred_struct_list: 123 | for s in true_struct_list: 124 | if os.path.basename(x).split('.')[0] in s: 125 | 126 | pstruct = load_matrix_or_dbn(x) 127 | pred_structs.append(pstruct) 128 | 129 | struct = load_matrix_or_dbn(s) 130 | true_structs.append(struct) 131 | 132 | assert len(pred_structs) == len(true_structs) 133 | 134 | tally, ptl_sen, ptl_ppv, ptl_mcc, ptl_fscore = 0, 0, 0, 0, 0 135 | 136 | pdb_indices = [os.path.basename(x).split('.')[0] for x in pred_struct_list] 137 | 138 | for i in range(len(pred_structs)): 139 | 140 | sen, ppv, mcc, fscore, N = score_ground_truth(pred_structs[i], true_structs[i]) 141 | print('Score:\t%s\t%.3f\t%.3f\t%.3f\t%.3f' % (pdb_indices[i], sen, ppv, mcc, fscore)) 142 | 143 | if weight_by_n_bps: 144 | ptl_sen += sen*N 145 | ptl_ppv += ppv*N 146 | ptl_mcc += mcc*N 147 | ptl_fscore += fscore*N 148 | tally += N 149 | 150 | else: 151 | ptl_sen += sen 152 | ptl_ppv += ppv 153 | ptl_mcc += mcc 154 | ptl_fscore += fscore 155 | tally += 1 156 | 157 | mean_sen = ptl_sen/tally 158 | mean_ppv = ptl_ppv/tally 159 | mean_mcc = ptl_mcc/tally 160 | mean_fscore = ptl_fscore/tally 161 | 162 | print("Avg:\tsen\tppv\tmcc\tfscore\n\t%.3f\t%.3f\t%.3f\t%.3f" % (mean_sen, mean_ppv, mean_mcc, mean_fscore)) 163 | 164 | return mean_sen, mean_ppv, mean_mcc, mean_fscore 165 | 166 | if __name__ == '__main__': 167 | 168 | parser=argparse.ArgumentParser( 169 | description='''Estimate maximum expected pseudoaccuracy structures per Hamada et al. BMC Bioinf 2010 11:586 and\ 170 | score against a ground truth dataset.\n 171 | 172 | Input format: Base pair probability matrices (specified in --bp_matrices) need to have same base names 173 | as structures (specified in --true_structs, and can be either dbn strings or NxN matrices), 174 | but the extensions for both types don't matter.''') 175 | 176 | parser.add_argument('--bp_matrices','-p', nargs='+', 177 | help='path to NxN matrices of bp probabilities, i.e. `contrafold/*.bpps`.') 178 | 179 | parser.add_argument('--output_dir', '-o', 180 | help="Path to output of predicted MEA structures. Default is `MEA_output`.", default = 'MEA_output') 181 | 182 | parser.add_argument('--true_structs','-s', nargs='+', 183 | help='Optional: path to true structures, i.e. `rnaview/*.struct`. These can be dbn structures or NxN matrices.', default=None) 184 | 185 | parser.add_argument('--metric', default='mcc', 186 | help='Accuracy metric, options are `mcc`, `fscore`, `ppv`, or `sen`. Default is `mcc`.') 187 | 188 | parser.add_argument('--gamma_min',type=int, default=-7, help='Min value for log_2(gamma), default is -7') 189 | parser.add_argument('--gamma_max',type=int, default=7, help='Max value for log_2(gamma), default is 7') 190 | 191 | parser.add_argument('--weight_by_n_bps', dest='weight_by_n_bps', action='store_true', 192 | help='For scoring to true structures, weight accuracy over dataset by number of bps.\ 193 | If flag not included, equal weight across constructs.') 194 | 195 | parser.add_argument('--verbose', dest='verbose', action='store_true') 196 | parser.add_argument('--score_truth_only', dest='score_truth_only', action='store_true', 197 | help='Use if MEA structures already generated and only scoring to ground truth dataset.') 198 | 199 | #print help and exit if no args 200 | if len(sys.argv)==1: 201 | parser.print_help(sys.stderr) 202 | sys.exit(1) 203 | 204 | args = parser.parse_args() 205 | 206 | #if args.true_structs: 207 | #assert len(args.bp_matrices) == len(args.true_structs) 208 | 209 | if args.verbose: 210 | print('\nRNA MEA STRUCTURE PREDICTION') 211 | print('Number of structures: %d' % len(args.bp_matrices)) 212 | print('Path to first base pair matrix: %s' % args.bp_matrices[0]) 213 | if args.true_structs: 214 | print('Path to first true struct: %s' % args.true_structs[0]) 215 | print('\nScanning gamma for MEA structure prediction:') 216 | 217 | if not args.score_truth_only: 218 | predict_MEA_structures(args.bp_matrices, gamma_min = args.gamma_min, gamma_max = args.gamma_max, verbose=args.verbose, metric = args.metric, output_dir = args.output_dir) 219 | 220 | if args.true_structs: 221 | if args.verbose: print('\nScoring provided true structures against maximum expected pseudoaccuracy structures:') 222 | score_against_true_structs(glob('%s/*' % args.output_dir), args.true_structs, verbose=args.verbose, weight_by_n_bps=args.weight_by_n_bps) 223 | -------------------------------------------------------------------------------- /scripts/write_bpp_matrices.py: -------------------------------------------------------------------------------- 1 | import sys, os, argparse 2 | import arnie.bpps as bpps 3 | from arnie.utils import write_matrix_to_file 4 | 5 | if __name__=='__main__': 6 | p = argparse.ArgumentParser(description= 7 | """ 8 | Write base pairing probability matrices to files. 9 | """) 10 | 11 | p.add_argument("seq_dir", nargs='+', 12 | help="path to dir of *.seq files") 13 | p.add_argument("-o", help="name of output dir") 14 | p.add_argument("-p", "--package", default='vienna_2', 15 | help="Package to use") 16 | 17 | if len(sys.argv)==1: 18 | p.print_help(sys.stderr) 19 | sys.exit(1) 20 | 21 | args = p.parse_args() 22 | 23 | if not os.path.exists('./%s' % args.o): 24 | os.makedirs('./%s' % args.o) 25 | 26 | for seqfile in args.seq_dir: 27 | print(seqfile) 28 | seq=open(seqfile,'r').readlines()[-1].rstrip() 29 | seq_id = os.path.basename(seqfile).replace('.seq','') 30 | bp_matrix = bpps.bpps(seq, package=args.package) 31 | with open("%s/%s.bpps" % (args.o, seq_id),'w') as f: 32 | write_matrix_to_file(bp_matrix, f) 33 | -------------------------------------------------------------------------------- /scripts/write_unpaired_vectors.py: -------------------------------------------------------------------------------- 1 | import sys, os, argparse 2 | import arnie.bpps as bpps 3 | import numpy as np 4 | from arnie.utils import write_vector_to_file 5 | 6 | if __name__=='__main__': 7 | p = argparse.ArgumentParser(description= 8 | """Write unpaired posterior probabilities to files. 9 | """) 10 | 11 | p.add_argument("seq_dir", nargs='+', 12 | help="path to dir of *.seq files") 13 | p.add_argument("-o", help="name of output dir") 14 | p.add_argument("-p", "--package", default='vienna_2', help="Package to use") 15 | 16 | if len(sys.argv)==1: 17 | p.print_help(sys.stderr) 18 | sys.exit(1) 19 | 20 | args = p.parse_args() 21 | 22 | if not os.path.exists('./%s' % args.o): 23 | os.makedirs('./%s' % args.o) 24 | 25 | for seqfile in args.seq_dir: 26 | print(seqfile) 27 | seq=open(seqfile,'r').readlines()[-1].rstrip() 28 | seq_id = os.path.basename(seqfile).replace('.seq','') 29 | 30 | unp_vector = 1-np.sum(bpps.bpps(seq, package=args.package),axis=0) 31 | 32 | with open("%s/%s.unp" % (args.o, seq_id),'w') as f: 33 | write_vector_to_file(unp_vector, f) 34 | -------------------------------------------------------------------------------- /src/arnie/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/arnie/bpps.py: -------------------------------------------------------------------------------- 1 | import os, re, sys 2 | import subprocess as sp 3 | import random, string 4 | import numpy as np 5 | from .utils import * 6 | from .pfunc import pfunc 7 | 8 | # load package locations from yaml file, watch! global dict 9 | package_locs = load_package_locations() 10 | 11 | def bpps(sequence, package='vienna', constraint=None, pseudo=False, 12 | T=37, coaxial=True, linear=False, dna=False, 13 | motif=None, dangles=True,param_file=None,reweight=None, beam_size=100, DEBUG=False, threshknot=False, 14 | probing_signal=None, probing_kws=None,DIRLOC=None): 15 | 16 | ''' Compute base pairing probability matrix for RNA sequence. 17 | 18 | Args: 19 | sequence (str): nucleic acid sequence 20 | T (float): temperature (Celsius) 21 | linear (bool): call LinearPartition to estimate Z in Vienna or Contrafold 22 | constraint (str): structure constraint (functional in vienna, contrafold, rnastructure) 23 | motif (str): argument to vienna motif 24 | pseudo (bool): (NUPACK only) include pseudoknot calculation 25 | dangles (bool): dangles or not, specifiable for vienna, nupack 26 | dna (bool): (NUPACK only) use SantaLucia 1998 parameters for DNA 27 | coaxial (bool): coaxial stacking or not, specifiable for rnastructure, vfold 28 | noncanonical(bool): include noncanonical pairs or not (for contrafold, RNAstructure (Cyclefold)) 29 | beam size (int): Beam size for LinearPartition base pair calculation. 30 | DEBUG (bool): Output command-line calls to packages. 31 | threshknot (bool): calls threshknot to predict pseudoknots (for contrafold with LinearPartition) 32 | 33 | Possible packages: 'vienna_2', 'vienna_1','contrafold_1','contrafold_2', 34 | 'nupack_95','nupack_99','rnasoft_2007','rnasoft_1999','rnastructure','vfold_0','vfold_1' 35 | 36 | Returns 37 | array: NxN matrix of base pair probabilities 38 | ''' 39 | package = package.lower() 40 | try: 41 | pkg, version = package.split('_') 42 | except: 43 | pkg, version = package, None 44 | 45 | if motif is not None and pkg != 'vienna': 46 | raise ValueError('motif option can only be used with Vienna.') 47 | 48 | if pseudo and pkg != 'nupack': 49 | raise ValueError('pseudoknot option only implemented with Nupack.') 50 | 51 | if not dangles and pkg not in ['vienna','nupack']: 52 | print('Warning: %s does not support dangles options' % pkg) 53 | if not coaxial and pkg not in ['rnastructure','vfold']: 54 | print('Warning: %s does not support coaxial options' % pkg) 55 | if linear and pkg not in ['vienna','contrafold','eternafold']: 56 | print('Warning: LinearPartition only implemented for vienna, contrafold, eternafold.') 57 | 58 | if pkg=='nupack': 59 | return bpps_nupack_(sequence, version = version, dangles = dangles, T = T, pseudo=pseudo, dna=dna) 60 | 61 | elif pkg=='vfold': 62 | return bpps_vfold_(sequence, version = version, T = T, coaxial = coaxial) 63 | else: 64 | 65 | _, tmp_file = pfunc(sequence, package=package, bpps=True, linear=linear, 66 | motif=motif, constraint=constraint, T=T, coaxial=coaxial, probing_signal=probing_signal, probing_kws=probing_kws, DIRLOC=package_locs[package], 67 | dangles=dangles, param_file=param_file,reweight=reweight, beam_size=beam_size, DEBUG=DEBUG, threshknot=threshknot) 68 | 69 | if linear: 70 | #parse linearpartition output 71 | return bpps_linearpartition_(sequence, tmp_file) 72 | else: 73 | 74 | if 'contrafold' in pkg: 75 | return bpps_contrafold_(sequence, tmp_file) 76 | if package=='eternafold': 77 | return bpps_contrafold_(sequence, tmp_file) 78 | elif 'vienna' in pkg: 79 | return bpps_vienna_(sequence, tmp_file) 80 | elif 'rnasoft' in pkg: 81 | return bpps_rnasoft_(sequence, tmp_file) 82 | elif 'rnastructure' in pkg: 83 | return bpps_rnastructure_(sequence, tmp_file, coaxial=coaxial) 84 | 85 | else: 86 | raise RuntimeError('package not yet implemented') 87 | 88 | def bpps_vienna_(sequence, tmp_file): 89 | 90 | dot_fname = tmp_file 91 | 92 | probs=np.zeros([len(sequence), len(sequence)]) 93 | with open(dot_fname,'r') as f: 94 | for line in f.readlines(): 95 | if 'ubox' in line: 96 | try: 97 | i, j, p, _ = line.split() 98 | i, j, p = int(i)-1, int(j)-1, float(p)**2 99 | probs[i,j] = p 100 | probs[j,i] = p 101 | except: 102 | pass 103 | os.remove(dot_fname) 104 | return probs 105 | 106 | def bpps_contrafold_(sequence, tmp_file): 107 | 108 | fname = tmp_file 109 | 110 | probs=np.zeros([len(sequence), len(sequence)]) 111 | 112 | for line in open(fname).readlines(): 113 | if len(line.split(':')) > 1: 114 | first_ind = int(line.split()[0])-1 115 | for x in line.split()[2:]: 116 | second_ind = int(x.split(':')[0])-1 117 | p = float(x.split(':')[1]) 118 | probs[first_ind, second_ind] = p 119 | probs[second_ind, first_ind] = p 120 | 121 | os.remove(fname) 122 | 123 | return probs 124 | 125 | def bpps_rnasoft_(sequence, tmp_file): 126 | fname = tmp_file 127 | 128 | probs=np.zeros([len(sequence), len(sequence)]) 129 | for line in open(fname).readlines(): 130 | i,j,p = int(line.split()[0]), int(line.split()[1]), float(line.split()[2]) 131 | probs[i,j] = p 132 | probs[j,i] = p 133 | 134 | os.remove(fname) 135 | 136 | return probs 137 | 138 | def bpps_nupack_(sequence, version='95', T=37, dangles=True, pseudo=False,dna=False): 139 | 140 | if not version: version='95' 141 | 142 | nupack_materials={'95': 'rna1995', '99': 'rna1999'} 143 | 144 | if dna: 145 | material='dna1998' 146 | else: 147 | material=nupack_materials[version] 148 | 149 | DIR = package_locs['nupack'] 150 | 151 | if dangles: 152 | dangle_option='some' 153 | else: 154 | dangle_option='none' 155 | 156 | seqfile = write([sequence]) 157 | 158 | command=['%s/pairs' % DIR, '%s' % seqfile.replace('.in',''), 159 | '-T', str(T), '-material', material, '-dangles', dangle_option, '-cutoff', '0.0000000001'] 160 | 161 | if pseudo: 162 | command.append('--pseudo') 163 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE) 164 | 165 | stdout, stderr = p.communicate() 166 | 167 | if p.returncode: 168 | raise Exception('Nupack pfunc failed: on %s\n%s' % (sequence, stderr)) 169 | 170 | ppairs_file = '%s.ppairs' % seqfile.replace('.in','') 171 | os.remove(seqfile) 172 | 173 | probs=np.zeros([len(sequence), len(sequence)]) 174 | 175 | with open(ppairs_file, 'r') as f: 176 | for line in f.readlines(): 177 | if not line.startswith('%'): 178 | fields = line.split() 179 | if len(fields) > 1: 180 | if int(fields[1]) <= len(sequence): 181 | i, j, p = int(fields[0])-1, int(fields[1])-1, float(fields[2]) 182 | probs[i,j] = p 183 | probs[j,i] = p 184 | os.remove(ppairs_file) 185 | 186 | return probs 187 | 188 | def bpps_rnastructure_(sequence, tmp_file, coaxial=True, DEBUG=False): 189 | 190 | DIR = package_locs['rnastructure'] 191 | 192 | pfsfile = tmp_file #'%s/rnastructtmp.pfs' % package_locs['TMP'] 193 | outfile = '%s.probs' % (tmp_file.replace('.pfs','')) 194 | command = ['%s/ProbabilityPlot' % DIR, pfsfile, outfile, '-t', '-min', '0.0000000001'] 195 | 196 | probs=np.zeros([len(sequence), len(sequence)]) 197 | 198 | if DEBUG: print(' '.join(command)) 199 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE) 200 | 201 | stdout, stderr = p.communicate() 202 | 203 | if DEBUG: 204 | print('stdout') 205 | print(stdout) 206 | print('stderr') 207 | print(stderr) 208 | 209 | if p.returncode: 210 | raise Exception('RNAstructure ProbabilityPlot failed: on %s\n%s' % (seq, stderr)) 211 | 212 | with open(outfile, 'r') as f: 213 | for line in f.readlines()[2:]: 214 | fields = line.split() 215 | i, j, p = int(fields[0])-1, int(fields[1])-1, 10**(-1*float(fields[2])) 216 | probs[i,j] = p 217 | probs[j,i] = p 218 | 219 | os.remove(outfile) 220 | os.remove(pfsfile) 221 | return probs 222 | 223 | def bpps_vfold_(sequence, version='0',T=37, coaxial=True, DEBUG=False): 224 | #available versions: 0 for Turner 04 params, 1 for Mfold 2.3 params 225 | 226 | DIR = package_locs["vfold"] 227 | 228 | cwd = os.getcwd() 229 | os.chdir(DIR) #vfold precompiled binaries don't work being called from elsewhere 230 | 231 | if DEBUG: print(os.getcwd()) 232 | 233 | seqfile = write([sequence]) 234 | 235 | outfile = filename()+'.pij' 236 | 237 | if sys.platform=="linux": 238 | platform='linux' 239 | elif sys.platform=="darwin": 240 | platform='mac' 241 | elif sys.platform=="win32": 242 | platform='win' 243 | else: 244 | raise RuntimeError('Vfold has binaries for linux, macOS, and win') 245 | 246 | command = ['./Vfold2d_npk_%s.o %d %d %s %s %d' % (platform, int(coaxial), T, seqfile, outfile, int(version))] 247 | 248 | if DEBUG: print(' '.join(command)) 249 | 250 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE, shell=True) 251 | 252 | stdout, stderr = p.communicate() 253 | os.chdir(cwd) 254 | 255 | if DEBUG: 256 | print('stdout') 257 | print(stdout) 258 | print('stderr') 259 | print(stderr) 260 | if p.returncode: 261 | raise Exception('Vfold2d_npk failed: on %s\n%s' % (sequence, stderr)) 262 | 263 | os.remove(seqfile) 264 | probs = np.zeros([len(sequence),len(sequence)]) 265 | p_ij_output = np.loadtxt(outfile,usecols=(0,2,3)) #col 0: set of inds 1, col 1: set of inds 2, col 2: bpp 266 | 267 | for i,j,p in p_ij_output: 268 | probs[int(i-1),int(j-1)] = p 269 | probs[int(j-1),int(i-1)] = p 270 | os.remove(outfile) 271 | 272 | return probs 273 | #output: take second field of last line for Z 274 | 275 | 276 | def bpps_linearpartition_(sequence, tmp_file): 277 | 278 | fname = tmp_file 279 | 280 | probs=np.zeros([len(sequence), len(sequence)]) 281 | 282 | for line in open(fname,'r').readlines(): 283 | if len(line.strip())>0: 284 | first_ind, second_ind, p = line.strip().split(' ') 285 | first_ind = int(first_ind)-1 286 | second_ind = int(second_ind)-1 287 | p = float(p) 288 | probs[first_ind, second_ind] = p 289 | probs[second_ind, first_ind] = p 290 | 291 | os.remove(fname) 292 | 293 | return probs 294 | -------------------------------------------------------------------------------- /src/arnie/free_energy.py: -------------------------------------------------------------------------------- 1 | import os, re, sys 2 | import subprocess as sp 3 | import random, string 4 | import numpy as np 5 | from .utils import * 6 | from .pfunc import pfunc 7 | 8 | DEBUG=False 9 | 10 | # load package locations from yaml file, watch! global dict 11 | package_locs = load_package_locations() 12 | 13 | def free_energy(seq, constraint=None, package='vienna_2', T=37, coaxial=True, dna=False, beam_size=100, 14 | pseudo=False, dangles=True, reweight=None, ensemble=True, param_file=None, linear=False,DEBUG=False): 15 | ''' Compute free energy of RNA sequence. If structure is given, computes free energy of that structure. 16 | Otherwise, returns MFE structure of sequence [NOT IMPLEMENTED YET]. 17 | 18 | Args: 19 | seq (str): nucleic acid sequence 20 | constraint (str, optional): possible structure to constrain to in dot bracket notation 21 | T (float): temperature (Celsius), default 37 22 | 23 | ensemble (bool): to compute ensemble of constraint string or not. 24 | Just converts '.' to 'x' in string. 25 | If you want the free energy of just one structure, 26 | better practice is to use 'x' to denote unpaired. 27 | 28 | 29 | motif (str): argument to vienna motif 30 | beam_size (int): beam size for use in LinearPartition (Vienna, CONTRAfold, EternaFold only) 31 | dangles (bool): dangles or not, specifiable for vienna, nupack 32 | dna (bool): use SantaLucia model for DNA (NUPACK only) 33 | coaxial (bool): coaxial stacking or not, specifiable for rnastructure, vfold 34 | noncanonical(bool): include noncanonical pairs or not (for contrafold, RNAstructure (Cyclefold)) 35 | pseudo (bool): include pseudoknot (nupack only) 36 | Implemented packages: 37 | 'vienna_1', 'vienna_2', 'contrafold' 38 | 39 | NB: doesn't multiply by kT for contrafold... 40 | 41 | Returns 42 | free energy (float) 43 | ''' 44 | if not ensemble: 45 | constraint = constraint.replace('.','x') 46 | 47 | return pfunc(seq, package=package, T=T, dangles=dangles, coaxial=coaxial, pseudo=pseudo, dna=dna, beam_size = beam_size, 48 | constraint=constraint, reweight=reweight, param_file=param_file, return_free_energy=True, linear=linear, DEBUG=DEBUG) 49 | 50 | # if package.lower().startswith('contrafold'): 51 | # Z_constrained = pfunc(seq, package=package, T=T, dangles=dangles, constraint=constraint,param_file=param_file) 52 | 53 | # return -1* np.log(Z_constrained) # .00198 is k in kcal/mol #0.0019899*(273+T) * 54 | # else: 55 | # raise RuntimeError("%s `free_energy` not implemented yet" % package) 56 | -------------------------------------------------------------------------------- /src/arnie/mea/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/src/arnie/mea/__init__.py -------------------------------------------------------------------------------- /src/arnie/mea/mea.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import argparse, sys 3 | from arnie.mea.mea_utils import * 4 | from copy import copy 5 | 6 | class MEA: 7 | def __init__(self, bpps, gamma = 1.0, debug=False, run_probknot_heuristic = False, theta=0, stochastic=False): 8 | self.debug = debug 9 | self.bpps = bpps 10 | self.N=self.bpps.shape[0] 11 | self.gamma = gamma 12 | self.theta = theta 13 | self.W = np.zeros([self.N,self.N]) 14 | self.MEA_bp_list = [] 15 | self.structure = ['.']*self.N 16 | self.MEA_bp_matrix = np.zeros([self.N, self.N]) 17 | self.tb = np.zeros([self.N, self.N]) 18 | self.min_hp_length = 3 19 | self.evaluated = False 20 | self.stochastic = stochastic 21 | 22 | if run_probknot_heuristic: 23 | self.run_ProbKnot() 24 | else: 25 | self.run_MEA() 26 | 27 | def fill_W(self, i, j): 28 | if self.stochastic: 29 | options = [self.W[i+1, j], self.W[i, j-1],\ 30 | (self.gamma+1)*self.bpps[i,j] + self.W[i+1, j-1] - 1,\ 31 | np.max([self.W[i,k] + self.W[k+1, j] for k in range(i+1,j)])] 32 | option_wts = options - np.min(options) 33 | option_wts /= np.sum(option_wts) 34 | selection = np.random.choice([0,1,2,3],p=option_wts) 35 | self.W[i,j] = options[selection] 36 | self.tb[i,j] = selection #0: 5' pass, 1: 3' pass, 2: bp, 3: multiloop 37 | 38 | else: 39 | options = [self.W[i+1, j], self.W[i, j-1],\ 40 | (self.gamma+1)*self.bpps[i,j] + self.W[i+1, j-1] - 1,\ 41 | np.max([self.W[i,k] + self.W[k+1, j] for k in range(i+1,j)])] 42 | self.W[i,j] = np.max(options) 43 | self.tb[i,j] = np.argmax(options) #0: 5' pass, 1: 3' pass, 2: bp, 3: multiloop 44 | 45 | def run_MEA(self): 46 | # fill weight matrix 47 | for length in range(self.min_hp_length, self.N): 48 | for i in range(self.N-length): 49 | j = i + length 50 | self.fill_W(i,j) 51 | 52 | self.traceback(0,self.N-1) 53 | 54 | for x in self.MEA_bp_list: 55 | self.MEA_bp_matrix[x[0],x[1]]=1 56 | self.structure[x[0]]='(' 57 | self.structure[x[1]]=')' 58 | 59 | self.structure = ''.join(self.structure) 60 | if not self.evaluated: self.evaluated = True 61 | 62 | def run_ProbKnot(self): 63 | 64 | #Threshknot step: filter out bps below cutoff theta 65 | threshknot_filter = np.where(self.bpps <= self.theta) 66 | filtered_bpps = copy(self.bpps) 67 | filtered_bpps[threshknot_filter] = 0 68 | 69 | output = np.zeros([self.N, self.N]) 70 | 71 | # ProbKnot heuristic part 1: get all base pairs where p(ij) == p_max(i) 72 | output[np.where(self.bpps == np.max(self.bpps, axis=0))] = 1 73 | 74 | # ProbKnot heuristic part 2: get all base pairs where p(ij) == p_max(j) 75 | self.MEA_bp_matrix = np.clip(output+np.transpose(output)-1,0,1) 76 | 77 | for [i, j] in np.array(np.where(self.MEA_bp_matrix == 1)).T: 78 | if np.abs(i - j) > 1: 79 | if [j,i] not in self.MEA_bp_list: 80 | self.MEA_bp_list.append([i,j]) 81 | #self.structure[i] = '(' 82 | #self.structure[j] = ')' 83 | #print('Warning: formatting pseudoknotted dot-bracket structures not yet supported. Any pseudoknotted stems will only appear as parentheses (not brackets).') 84 | #self.structure = ''.join(self.structure) 85 | self.structure = convert_bp_list_to_dotbracket(self.MEA_bp_list,len(self.bpps)) 86 | 87 | if not self.evaluated: self.evaluated = True 88 | 89 | def traceback(self, i, j): 90 | if j <= i: 91 | return 92 | elif self.tb[i,j] == 0: #5' neighbor 93 | if self.debug: print(i,j, "5'") 94 | self.traceback(i+1,j) 95 | elif self.tb[i,j] == 1: #3' neighbor 96 | if self.debug: print(i,j, "3'") 97 | self.traceback(i,j-1) 98 | elif self.tb[i,j] == 2: # base pair 99 | if self.debug: print(i,j,'bp') 100 | self.MEA_bp_list.append((i,j)) 101 | self.traceback(i+1,j-1) 102 | else: #multiloop 103 | for k in range(i+1,j): 104 | if self.W[i,j] == self.W[i, k] + self.W[k+1,j]: 105 | if self.debug: print(i,j,"multiloop, k=",k) 106 | self.traceback(i,k) 107 | self.traceback(k+1,j) 108 | break 109 | 110 | def score_expected(self): 111 | '''Compute expected values of TP, FP, etc from predicted MEA structure. 112 | 113 | Returns: 114 | pseudoexpected SEN, PPV, MCC, F-score''' 115 | 116 | if not self.evaluated: 117 | if run_probknot_heuristic: 118 | self.run_ProbKnot() 119 | else: 120 | self.run_MEA() 121 | 122 | pred_m = self.MEA_bp_matrix[np.triu_indices(self.N)] 123 | probs = self.bpps[np.triu_indices(self.N)] 124 | 125 | TP = np.sum(np.multiply(pred_m, probs)) + 1e-6 126 | TN = 0.5*self.N*self.N-1 - np.sum(pred_m) - np.sum(probs) + TP + 1e-6 127 | FP = np.sum(np.multiply(pred_m, 1-probs)) + 1e-6 128 | FN = np.sum(np.multiply(1-pred_m, probs)) + 1e-6 129 | 130 | a,b = np.triu_indices(self.N) 131 | cFP = 1e-6 132 | # for i in range(len(pred_m)): 133 | # if np.sum(self.MEA_bp_matrix,axis=0)[a[i]] + np.sum(self.MEA_bp_matrix,axis=0)[b[i]]==0: 134 | # cFP += np.multiply(pred_m[i], 1-probs[i]) 135 | 136 | sen = TP/(TP + FN) 137 | ppv = TP/(TP + FP - cFP) 138 | mcc = (TP*TN - (FP - cFP)*FN)/np.sqrt((TP + FP - cFP)*(TP + FN)*(TN + FP - cFP)*(TN + FN)) 139 | fscore = 2*TP/(2*TP + FP - cFP + FN) 140 | 141 | return [sen, ppv, mcc, fscore] 142 | 143 | def score_ground_truth(self, ground_truth_struct, allow_pseudoknots=False): 144 | if len(ground_truth_struct[0])==1: 145 | gt_matrix = convert_dotbracket_to_matrix(ground_truth_struct) 146 | else: 147 | gt_matrix = ground_truth_struct 148 | 149 | if not self.evaluated: self.run_MEA() 150 | sen, ppv, mcc, fscore, _ = score_ground_truth(self.MEA_bp_matrix, gt_matrix) 151 | return [sen, ppv, mcc, fscore] 152 | -------------------------------------------------------------------------------- /src/arnie/mea/mea_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import argparse, sys 3 | 4 | def convert_dotbracket_to_matrix(s): 5 | m = np.zeros([len(s),len(s)]) 6 | for char_set in [['(',')'], ['[',']'],['{','}'],['<','>']]: 7 | bp1=[] 8 | bp2=[] 9 | for i, char in enumerate(s): 10 | if char==char_set[0]: 11 | bp1.append(i) 12 | if char==char_set[1]: 13 | bp2.append(i) 14 | for i in list(reversed(bp1)): 15 | for j in bp2: 16 | if j > i: 17 | m[i,j]=1.0 18 | bp2.remove(j) 19 | break 20 | return m 21 | 22 | 23 | 24 | def convert_matrix_to_dotbracket(m): 25 | bp_list = convert_matrix_to_bp_list(m) 26 | return convert_bp_list_to_dotbracket(bp_list,len(m)) 27 | 28 | def convert_matrix_to_bp_list(m): 29 | bp_list = [] # convert adjacency matrix to adjacency list 30 | for i,row in enumerate(m): 31 | for j,is_bp in enumerate(row[i+1:]): 32 | if is_bp: 33 | bp_list.append([i,i+1+j]) 34 | return bp_list 35 | 36 | 37 | def convert_bp_list_to_dotbracket(bp_list,seq_len): 38 | dotbracket = "."*seq_len 39 | # group into bps that are not intertwined and can use same brackets! 40 | groups = group_into_non_conflicting_bp_(bp_list) 41 | 42 | # all bp that are not intertwined get (), but all others are 43 | # groups to be nonconflicting and then asigned (), [], {}, <> by group 44 | chars_set = [("(",")"),("(",")"),("[","]"),("{","}"),("<",">")] 45 | if len(groups) > len(chars_set): 46 | print("WARNING: PK too complex, not enough brackets to represent it.") 47 | 48 | for group,chars in zip(groups,chars_set): 49 | for bp in group: 50 | dotbracket = dotbracket[:bp[0]] + chars[0] + dotbracket[bp[0]+1:bp[1]] + chars[1] + dotbracket[bp[1]+1:] 51 | return dotbracket 52 | 53 | 54 | def load_matrix_or_dbn(s): 55 | num_lines = sum(1 for line in open(s)) 56 | 57 | if num_lines > 2: #heuristic here 58 | struct = np.loadtxt(s) # load as base pair matrix 59 | assert struct.shape[0] == struct.shape[1] 60 | else: 61 | try: # load as dot-bracket string 62 | 63 | dbn_struct = open(s,'r').read().rstrip() 64 | 65 | struct = convert_dotbracket_to_matrix(dbn_struct) 66 | except: 67 | raise ValueError('Unable to parse structure %s' % s) 68 | return struct 69 | 70 | def score_ground_truth(pred_matrix, true_matrix): 71 | '''Score a predicted structure against a true structure, 72 | input as NxN base pair matrix (takes top triangle).''' 73 | 74 | N = pred_matrix.shape[0] 75 | #print('pred',pred_matrix.shape, 'true', true_matrix.shape) 76 | assert pred_matrix.shape[1] == N 77 | assert true_matrix.shape[0] == N 78 | assert true_matrix.shape[1] == N 79 | 80 | true = true_matrix[np.triu_indices(N)] 81 | pred = pred_matrix[np.triu_indices(N)] 82 | 83 | TP, FP, cFP, TN, FN = 0, 0, 0, 0, 0 84 | 85 | for i in range(len(true)): 86 | if true[i] == 1: 87 | if pred[i] == 1: 88 | TP += 1 89 | else: 90 | FN += 1 91 | elif true[i] == 0: 92 | if pred[i] == 0: 93 | TN += 1 94 | else: 95 | FP += 1 96 | #check for compatible false positive 97 | a,b = np.triu_indices(N) 98 | if np.sum(true_matrix,axis=0)[a[i]]+ np.sum(true_matrix,axis=0)[b[i]]==0: 99 | cFP +=1 100 | 101 | # cFP = 0 #for debugging 102 | 103 | #print('TP', TP, 'TN', TN, 'FP', FP, 'FN', FN, 'cFP', cFP) 104 | 105 | if TP + FN == 0: 106 | sen = 1 107 | else: 108 | sen = TP/(TP + FN) 109 | 110 | if TP + FP - cFP == 0: 111 | ppv = 1 112 | else: 113 | ppv = TP/(TP + FP - cFP) 114 | 115 | mcc_num = (TP*TN - (FP - cFP)*FN) 116 | mcc_denom = np.sqrt((TP + FP - cFP)*(TP + FN)*(TN + FP - cFP)*(TN + FN)) 117 | 118 | if mcc_denom == 0: 119 | mcc = mcc_num 120 | else: 121 | mcc = mcc_num/mcc_denom 122 | 123 | if ppv + sen == 0: 124 | fscore = 0 125 | else: 126 | fscore = 2*ppv*sen/(ppv+sen) 127 | 128 | return sen, ppv, mcc, fscore, N 129 | 130 | 131 | def group_into_non_conflicting_bp_(bp_list): 132 | ''' given a conflict list from get_list_bp_conflicts_, group basepairs into groups that do not conflict 133 | 134 | Args 135 | conflict_list: list of pairs of base_pairs that are intertwined basepairs 136 | 137 | Returns: 138 | groups of baspairs that are not intertwined 139 | ''' 140 | conflict_list = get_list_bp_conflicts_(bp_list) 141 | 142 | non_redudant_bp_list = get_non_redudant_bp_list_(conflict_list) 143 | bp_with_no_conflict = [bp for bp in bp_list if bp not in non_redudant_bp_list] 144 | groups = [bp_with_no_conflict] 145 | while non_redudant_bp_list != []: 146 | current_bp = non_redudant_bp_list[0] 147 | current_bp_conflicts = [] 148 | for conflict in conflict_list: 149 | if current_bp == conflict[0]: 150 | current_bp_conflicts.append(conflict[1]) 151 | elif current_bp == conflict[1]: 152 | current_bp_conflicts.append(conflict[0]) 153 | group = [bp for bp in non_redudant_bp_list if bp not in current_bp_conflicts] 154 | groups.append(group) 155 | non_redudant_bp_list = current_bp_conflicts 156 | conflict_list = [conflict for conflict in conflict_list if conflict[0] not in group and conflict[1] not in group] 157 | return groups 158 | 159 | 160 | def get_list_bp_conflicts_(bp_list): 161 | '''given a bp_list gives the list of conflicts bp-s which indicate PK structure 162 | Args: 163 | bp_list: of list of base pairs where the base pairs are list of indeces of the bp in increasing order (bp[0]sequence', seq]) 241 | ct_fname = '%s.ct' % filename() 242 | 243 | command = [] 244 | if not pseudo: 245 | command = command + ['%s/Fold' % LOC, seq_file, ct_fname, '-T', str(T + 273.15)] 246 | else: 247 | command = command + ['%s/ShapeKnots' % LOC, seq_file, ct_fname] 248 | # if dms_signal is not None: 249 | # raise ValueError('Cannot run RNAstructure with DMS signal and pseudoknots.') 250 | if constraint is not None: 251 | raise ValueError('Cannot run RNAstructure with constraints and pseudoknots.') 252 | 253 | con_fname = None 254 | dms_fname = None 255 | shape_fname = None 256 | 257 | if constraint is not None: 258 | con_fname = '%s.CON' % filename() 259 | convert_dbn_to_RNAstructure_input(seq, constraint, con_fname) 260 | command.extend(['--constraint', con_fname]) 261 | 262 | if dms_signal is not None: 263 | if len(dms_signal) != len(seq): 264 | raise RuntimeError('DMS signal used with RNAstructure must have same length as the sequence.') 265 | dms_fname = write_reactivity_file_RNAstructure(dms_signal) 266 | command.extend(['--DMS', dms_fname]) 267 | 268 | if dms_file is not None: 269 | command.extend(['--DMS', dms_file]) 270 | 271 | if shape_signal is not None: 272 | if len(shape_signal) != len(seq): 273 | raise RuntimeError('SHAPE signal used with RNAstructure must have same length as the sequence.') 274 | shape_fname = write_reactivity_file_RNAstructure(shape_signal) 275 | command.extend(['--SHAPE', shape_fname]) 276 | 277 | if shape_file is not None: 278 | command.extend(['--SHAPE', shape_file]) 279 | 280 | if DEBUG: print(' '.join(command)) 281 | 282 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE) 283 | 284 | stdout, stderr = p.communicate() 285 | 286 | if DEBUG: 287 | print('stdout') 288 | print(stdout) 289 | print('stderr') 290 | print(stderr) 291 | if p.returncode: 292 | raise Exception('RNAstructure failed: on %s\n%s' % (seq, stderr)) 293 | 294 | if con_fname is not None: 295 | os.remove(con_fname) 296 | if dms_fname is not None: 297 | os.remove(dms_fname) 298 | if shape_fname is not None: 299 | os.remove(shape_fname) 300 | if seq_file is not None: 301 | os.remove(seq_file) 302 | 303 | dot_fname = '%s.dbn' % filename() 304 | command = ['%s/ct2dot' % LOC, ct_fname, "1", dot_fname] 305 | 306 | if DEBUG: print(' '.join(command)) 307 | 308 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE) 309 | 310 | stdout, stderr = p.communicate() 311 | 312 | if DEBUG: 313 | print('stdout') 314 | print(stdout) 315 | print('stderr') 316 | print(stderr) 317 | if p.returncode: 318 | raise Exception('RNAstructure ct2dot failed: on %s\n%s' % (seq, stderr)) 319 | 320 | f = open(dot_fname) 321 | dot_lines = f.readlines() 322 | f.close() 323 | 324 | mfe_struct = dot_lines[-1].strip('\n') 325 | 326 | os.remove(ct_fname) 327 | os.remove(dot_fname) 328 | 329 | return mfe_struct 330 | 331 | def mfe_contrafold_(seq, T=37, version='2', constraint=None, param_file=None,DIRLOC=None, 332 | viterbi=False, probing_signal=None, probing_kws=None): 333 | """get MFE structure for Contrafold 334 | 335 | Args: 336 | seq (str): nucleic acid sequence 337 | T (float): temperature 338 | constraint (str): structure constraints 339 | motif (str): argument to vienna motif 340 | Returns 341 | secondary structure dot-bracket string for MFE 342 | """ 343 | if not version: version='2' 344 | 345 | if probing_signal is not None: 346 | fname = write_reactivity_file_contrafold(probing_signal, seq) 347 | else: 348 | fname = '%s.in' % filename() 349 | 350 | if DIRLOC is not None: 351 | LOC=DIRLOC 352 | elif version.startswith('2'): 353 | LOC=package_locs['contrafold_2'] 354 | elif version.startswith('1'): 355 | LOC=package_locs['contrafold_1'] 356 | else: 357 | raise RuntimeError('Error, Contrafold version %s not present' % version) 358 | 359 | command = ['%s/contrafold' % LOC, 'predict', fname] 360 | 361 | if probing_signal is not None: 362 | command = command + ['--evidence', '--params', package_locs['eternafold']+'/../parameters/EternaFoldParams_PLUS_POTENTIALS.v1', '--numdatasources','1', ] 363 | if probing_kws is not None: 364 | if 'kappa' in probing_kws.keys(): 365 | command = command + ['--kappa', str(probing_kws['kappa']) ] 366 | else: 367 | if param_file is not None: 368 | command = command + ['--params', param_file] 369 | 370 | if viterbi: 371 | command.append('--viterbi') 372 | 373 | if constraint is not None: 374 | convert_dbn_to_contrafold_input(seq, constraint, fname) 375 | command.append('--constraints') 376 | else: 377 | if probing_signal is None: 378 | convert_dbn_to_contrafold_input(seq, ''.join(['.' for x in range(len(seq))]), fname) 379 | 380 | if DEBUG: print(' '.join(command)) 381 | 382 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE) 383 | 384 | stdout, stderr = p.communicate() 385 | 386 | if DEBUG: 387 | print('stdout') 388 | print(stdout) 389 | print('stderr') 390 | print(stderr) 391 | if p.returncode: 392 | raise Exception('Contrafold failed: on %s\n%s' % (seq, stderr)) 393 | 394 | os.remove(fname) 395 | 396 | return stdout.decode('utf-8').split('\n')[-2] 397 | 398 | def mfe_linearfold_(seq, bpps=False, package='contrafold', beam_size=100, return_dG_MFE=False): 399 | 400 | seqfile = write([seq]) 401 | 402 | LOC = package_locs['linearfold'] 403 | 404 | if bpps: 405 | 406 | pf_only = 0 407 | else: 408 | pf_only = 1 409 | 410 | # args: beamsize, is_sharpturn, is_verbose, is_eval, is_constraints] 411 | #Todo: implement constraint input 412 | command=['echo %s | %s/linearfold_%s' % (seq, LOC, package[0]), str(beam_size), '0', '0', '0'] 413 | if DEBUG: print(' '.join(command)) 414 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE, shell=True) 415 | 416 | stdout, stderr = p.communicate() 417 | 418 | if DEBUG: 419 | print('stdout') 420 | print(stdout) 421 | print('stderr') 422 | print(stderr) 423 | 424 | if p.returncode: 425 | raise Exception('LinearFold failed: on %s\n%s' % (seq, stderr)) 426 | 427 | 428 | # linearfold returns two different things depending on which package 429 | struct = stdout.decode('utf-8').split('\n')[1].split(' ')[0] 430 | 431 | os.remove(seqfile) 432 | 433 | if return_dG_MFE: 434 | 435 | dG_mfe = float(stdout.decode('utf-8').split('\n')[1].split(' ')[1][1:-1]) 436 | 437 | if package.lower() != 'vienna': 438 | dG_mfe *= -1 439 | 440 | return struct, dG_mfe 441 | 442 | else: 443 | return struct 444 | 445 | 446 | -------------------------------------------------------------------------------- /src/arnie/mfe_bootstrap.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | from .mfe import mfe 4 | from .utils import get_bpp_from_dbn 5 | from .utils import filename 6 | from .utils import load_package_locations 7 | from os import remove 8 | 9 | # load package locations from yaml file, watch! global dict 10 | package_locs = load_package_locations() 11 | 12 | def get_bootstrap_reac_file(reactivity): 13 | reac_file = '%s.SHAPE' % filename() 14 | range_arr = np.arange(1, len(reactivity) + 1) 15 | reac_arr = np.array(reactivity) 16 | shape_pos = np.array([range_arr, reac_arr]).T 17 | sample_idx = np.random.choice(len(reactivity), len(reactivity)) 18 | shape_pos = shape_pos[sample_idx,:] 19 | 20 | f = open(reac_file, 'w') 21 | for cur_sample in shape_pos: 22 | pos, reactivity = cur_sample 23 | if reactivity > 0: 24 | f.write('%d %f\n' % (pos, reactivity)) 25 | f.close() 26 | 27 | return reac_file 28 | 29 | def mfe_bootstrap(seq, num_bootstrap, 30 | package='rnastructure', T=37, 31 | constraint=None, shape_signal=None, dms_signal=None, pseudo=False): 32 | """ 33 | Compute MFE structure (within package) for RNA sequence with bootstrapping on the SHAPE/DMS data. 34 | 35 | Args: 36 | seq (str): nucleic acid sequence 37 | T (float): temperature (Celsius) 38 | constraint (str): structure constraints 39 | shape_signal(list): list of normalized SHAPE reactivities, with negative values indicating no signal 40 | dms_signal(list): list of normalized DMS reactivities, with negative values indicating no signal 41 | pseudo: if True, will predict pseudoknots, but only with RNAstructure 42 | 43 | Possible packages: 44 | 'rnastructure' 45 | 46 | Returns 47 | string: MFE structure 48 | np array: Base-pair probability matrix from bootstrapping 49 | """ 50 | if (shape_signal is None) and (dms_signal is None): 51 | raise ValueError("Bootstrapping only applies if you have reactivity data.") 52 | if package != 'rnastructure': 53 | raise ValueError("Bootstrapping only runs for now with RNAstructure") 54 | 55 | bpp_matrix = np.zeros((len(seq), len(seq))) 56 | 57 | mfe_struct = mfe(seq, package=package, T=T, constraint=constraint, 58 | shape_signal=shape_signal, dms_signal=dms_signal, pseudo=pseudo) 59 | 60 | for bootstrap in range(num_bootstrap): 61 | shape_file = None 62 | dms_file = None 63 | 64 | if shape_signal is not None: 65 | shape_file = get_bootstrap_reac_file(shape_signal) 66 | if dms_signal is not None: 67 | dms_file = get_bootstrap_reac_file(dms_signal) 68 | 69 | cur_mfe_struct = mfe(seq, package=package, T=T, constraint=constraint, 70 | shape_file=shape_file, dms_file=dms_file, pseudo=pseudo) 71 | bpp_matrix += get_bpp_from_dbn(cur_mfe_struct) 72 | 73 | if shape_signal is not None: 74 | remove(shape_file) 75 | if dms_signal is not None: 76 | remove(dms_file) 77 | 78 | return [mfe_struct, bpp_matrix/num_bootstrap] 79 | -------------------------------------------------------------------------------- /src/arnie/pk_predictors.py: -------------------------------------------------------------------------------- 1 | import subprocess as sp 2 | from arnie.utils import * 3 | import glob 4 | from os import getcwd, chdir, remove, mkdir, rmdir, path 5 | from scipy.optimize import linear_sum_assignment 6 | 7 | 8 | # TODO script all previous investigations 9 | # TODO Debug modes to print output and err to help with install issues 10 | # TODO pk_predict options + 11 | 12 | package_locs = load_package_locations() 13 | 14 | 15 | def pk_predict(seq, predictor, 16 | model="default", param="parameters_DP03.txt", 17 | refinement=1, t1="auto", t2='auto', 18 | cpu=32): 19 | ''' 20 | 21 | ipknot options: 22 | model: one of ["LinearPartition-C","LinearPartition-V","Boltzmann","ViennaRNA","CONTRAfold","NUPACK"] 23 | t1: probability threshold level 1 24 | t2: probability threshold level 2 25 | refinement: number of times for refinment 26 | 27 | hotknots options: 28 | model: one of ["CC","RE","DP"] 29 | param: one of ["parameters_CC06.txt","parameters_CC09.txt","parameters_DP03.txt","parameters_DP09.txt"] 30 | 31 | spotrna options: 32 | cpu: number cpu threads 33 | 34 | e2efold options: 35 | ??? 36 | 37 | nupack options: 38 | ???? 39 | 40 | ''' 41 | if predictor not in ["hotknots", "ipknot", "knotty", "spotrna", "e2efold", "pknots","spotrna2","nupack"]: 42 | raise ValueError('Only hotknots,ipknot,knotty,spotrna,spotrna2,e2efold,pknots,nupack implemented.') 43 | if predictor == "spotrna": 44 | return _run_spotrna(seq, cpu=cpu)[0] 45 | elif predictor == "spotrna2": 46 | return _run_spotrna2(seq)[0] 47 | elif predictor == "e2efold": 48 | return _e2efold(seq) 49 | elif predictor == "pknots": 50 | return _pknots(seq) 51 | elif predictor == "knotty": 52 | return _knotty_mfe(seq) 53 | elif predictor == "hotknots": 54 | if model == "default": 55 | model = "DP" 56 | if model not in ["CC", "RE", "DP"]: 57 | raise ValueError('Only CC, RE, DP model implemented for hotknots.') 58 | if param not in ["parameters_CC06.txt", "parameters_CC09.txt", "parameters_DP03.txt", "parameters_DP09.txt"]: 59 | raise ValueError('Only parameters_CC06.txt, parameters_CC09.txt, parameters_DP03.txt, parameters_DP09.txt parameters implemented for hotknots.') 60 | return _run_hotknots(seq, model=model, param=param)[0][0] 61 | elif predictor == "ipknot": 62 | if model == "default": 63 | model = "LinearPartition-C" 64 | if model not in ["LinearPartition-C", "LinearPartition-V", "Boltzmann", "ViennaRNA", "CONTRAfold", "NUPACK"]: 65 | raise ValueError('Only LinearPartition-C, LinearPartition-V, Boltzmann, ViennaRNA, CONTRAfold, NUPACK model implemented for ipknot.') 66 | return _ipknot_mfe(seq, model=model, refinement=refinement, t1=t1, t2=t2) 67 | elif predictor == "nupack": 68 | return _nupack_mfe_pk(seq) 69 | 70 | 71 | def pk_predict_from_bpp(bpp, heuristic="hungarian", theta=None, allowed_buldge_len=0, min_len_helix=2, 72 | exp=1, sigmoid_slope_factor=None, prob_to_0_threshold_prior=0, prob_to_1_threshold_prior=1, ln=False, add_p_unpaired=True, 73 | max_iter=1): 74 | ''' 75 | threshknot options: 76 | theta 77 | max_iter 78 | allowed_buldge_len 79 | min_len_helix 80 | 81 | hungarian options: 82 | add_p_unpaired 83 | theta (aka prob_to_0_threshold_post) 84 | prob_to_0_threshold_prior 85 | prob_to_1_threshold_prior 86 | exp 87 | sigmoid_slope_factor 88 | ln 89 | allowed_buldge_len 90 | min_len_helix 91 | ''' 92 | 93 | if heuristic not in ["threshknot", "hungarian"]: 94 | raise ValueError('Only threshknot and hunagrian heuristics implemented.') 95 | 96 | if heuristic == "threshknot": 97 | if theta is None: 98 | theta = 0.3 99 | return _threshknot(bpp, theta=theta, max_iter=max_iter, allowed_buldge_len=allowed_buldge_len, min_len_helix=min_len_helix)[0] 100 | elif heuristic == "hungarian": 101 | if theta is None: 102 | theta = 0.0 103 | return _hungarian(bpp, exp=1, sigmoid_slope_factor=sigmoid_slope_factor, prob_to_0_threshold_prior=prob_to_0_threshold_prior, 104 | prob_to_1_threshold_prior=prob_to_1_threshold_prior, theta=theta, ln=ln, add_p_unpaired=add_p_unpaired, 105 | allowed_buldge_len=allowed_buldge_len, min_len_helix=min_len_helix)[0] 106 | 107 | 108 | def _hungarian(bpp, exp=1, sigmoid_slope_factor=None, prob_to_0_threshold_prior=0, 109 | prob_to_1_threshold_prior=1, theta=0, ln=False, add_p_unpaired=True, 110 | allowed_buldge_len=0, min_len_helix=2): 111 | 112 | bpp_orig = bpp.copy() 113 | 114 | if add_p_unpaired: 115 | p_unpaired = 1 - np.clip(np.sum(bpp, axis=0), 0, 1) 116 | for i, punp in enumerate(p_unpaired): 117 | bpp[i, i] = punp 118 | 119 | # apply prob_to_0 threshold and prob_to_1 threshold 120 | bpp = np.where(bpp < prob_to_0_threshold_prior, 0, bpp) 121 | bpp = np.where(bpp > prob_to_1_threshold_prior, 1, bpp) 122 | 123 | # aply exponential. On second thought this is likely not as helpful as sigmoid since 124 | # * for 0 < exp < 1 lower probs will increase more than higher ones (seems undesirable) 125 | # * for exp > 1 all probs will decrease, which seems undesirable (but at least lower probs decrease more than higher ones) 126 | bpp = np.power(bpp, exp) 127 | 128 | # apply log which follows botlzamann where -ln(P) porportional to Energy 129 | if ln: 130 | bpp = np.log(bpp) 131 | 132 | bpp = np.where(np.isneginf(bpp), -1e10, bpp) 133 | bpp = np.where(np.isposinf(bpp), 1e10, bpp) 134 | 135 | # apply sigmoid modified by slope factor 136 | if sigmoid_slope_factor is not None and np.any(bpp): 137 | bpp = _sigmoid(bpp, slope_factor=sigmoid_slope_factor) 138 | 139 | # should think about order of above functions and possibly normalize again here 140 | 141 | # run hungarian algorithm to find base pairs 142 | _, row_pairs = linear_sum_assignment(-bpp) 143 | # Hungarian/linear sum assignment operates on a bipartite graph such that each row is assigned to 144 | # exactly one column and each column is assigned to exactly one row, however our case is not 145 | # bipartite. That means some chosen assignments could conflict with others, either creating 146 | # a "chain" (eg [(0,5), (5,10)]) or cycle (eg [(0,5), (5,10), (10, 0)]). We resolve these 147 | # conflicts by solving for the maximum weight independent set. (Note that if we have 148 | # two assignments like [(0,5) and (5,0)] we only need to deduplicate, hence the usage of set). 149 | bp_assignments = set( 150 | tuple(sorted((col, row))) 151 | for col, row in enumerate(row_pairs) 152 | if bpp_orig[col, row] > theta and col != row 153 | ) 154 | bp_list = [] 155 | while len(bp_assignments): 156 | bps = [bp_assignments.pop()] 157 | 158 | # # Start building a chain to the "left" 159 | check_nt = bps[0][0] 160 | while conflict := next((bp for bp in bp_assignments if check_nt in bp), None): 161 | bps.insert(0, conflict) 162 | bp_assignments.remove(conflict) 163 | check_nt = next((nt for nt in conflict if nt != check_nt), None) 164 | # And to the "right" 165 | check_nt = bps[-1][1] 166 | while conflict := next((bp for bp in bp_assignments if check_nt in bp), None): 167 | bps.append(conflict) 168 | bp_assignments.remove(conflict) 169 | check_nt = next((nt for nt in conflict if nt != check_nt), None) 170 | 171 | if len(bps) == 1: 172 | bp_list.extend(bps) 173 | elif len(bps) > 2 and (bps[0][0] in bps[-1] or bps[0][1] in bps[-1]): 174 | # We have a cycle. We need to try both excluding the first element and excluding 175 | # the last element (only one or the other, or neither, can be present since they conflict) 176 | (bp_list_a,prob_a) = _max_weight_independent_set(bps[1:], bpp_orig) 177 | (bp_list_b,prob_b) = _max_weight_independent_set(bps[:-1], bpp_orig) 178 | if prob_a > prob_b: 179 | bp_list.extend(bp_list_a) 180 | else: 181 | bp_list.extend(bp_list_b) 182 | else: 183 | (bp_list_,_) = _max_weight_independent_set(bps, bpp_orig) 184 | bp_list.extend(bp_list_) 185 | 186 | bp_list = [list(bp) for bp in bp_list] 187 | bp_list = _check_bp_list(bp_list) 188 | structure = convert_bp_list_to_dotbracket(bp_list, bpp.shape[0]) 189 | structure = post_process_struct(structure, allowed_buldge_len, min_len_helix) 190 | bp_list = convert_dotbracket_to_bp_list(structure, allow_pseudoknots=True) 191 | 192 | return structure, bp_list 193 | 194 | def _max_weight_independent_set(pairs, probs): 195 | max_sets = [] 196 | for bp in pairs: 197 | bp_prob = probs[bp[0], bp[1]] 198 | 199 | if len(max_sets) == 0: 200 | max_sets.append({'prob': bp_prob, 'bps': [bp]}) 201 | elif len(max_sets) == 1: 202 | if max_sets[0]['prob'] > bp_prob: 203 | max_sets.append(max_sets[0]) 204 | elif bp_prob > max_sets[0]['prob']: 205 | max_sets.append({'prob': bp_prob, 'bps': [bp]}) 206 | elif abs(max_sets[0]['bps'][0][0] - max_sets[0]['bps'][0][1]) <= abs(bp[0] - bp[1]): 207 | max_sets.append(max_sets[0]) 208 | else: 209 | max_sets.append({'prob': bp_prob, 'bps': [bp]}) 210 | else: 211 | if max_sets[-1]['prob'] > max_sets[-2]['prob'] + bp_prob: 212 | max_sets.append(max_sets[-1]) 213 | elif max_sets[-2]['prob'] + bp_prob > max_sets[-1]['prob']: 214 | max_sets.append({'prob': max_sets[-2]['prob'] + bp_prob, 'bps': [*max_sets[-2]['bps'], bp]}) 215 | elif abs(max_sets[-1]['bps'][0][0] - max_sets[-1]['bps'][0][1]) <= abs(bp[0] - bp[1]): 216 | max_sets.append(max_sets[-1]) 217 | else: 218 | max_sets.append({'prob': max_sets[-2]['prob'] + bp_prob, 'bps': [*max_sets[-2]['bps'], bp]}) 219 | 220 | return (max_sets[-1]['bps'], max_sets[-1]['prob']) 221 | 222 | def _sigmoid(x, slope_factor=0.5): 223 | # normalize to [-1, 1] 224 | numerator = (x - x.min()) * 2.0 225 | denominator = x.max() - x.min() 226 | #print(numerator, denominator) 227 | x = numerator / (denominator + 1e-6) - 1.0 228 | return 1 / (1 + np.exp(-x / slope_factor)) 229 | 230 | 231 | def _threshknot(bpp, theta=0.3, max_iter=1, allowed_buldge_len=0, min_len_helix=2): 232 | iteration = 0 233 | length = bpp.shape[0] 234 | bp_list = [] 235 | new_bp = 1 236 | while new_bp != 0 and iteration <= max_iter: 237 | current_bp_list = [] 238 | bp_list_flat = np.array(bp_list).flatten() 239 | if np.any(bp_list_flat): 240 | bpp_update = np.delete(bpp, bp_list_flat, axis=1) 241 | if np.any(bpp_update): 242 | Pmax = np.amax(bpp_update, axis=1) 243 | else: 244 | Pmax = np.amax(bpp, axis=1) 245 | for i in range(length): 246 | for j in range(i + 1, length): 247 | if i not in bp_list_flat and j not in bp_list_flat: 248 | prob = bpp[i, j] 249 | if prob == Pmax[i] and prob == Pmax[j] and prob > theta: 250 | current_bp_list.append([i, j]) 251 | new_bp = len(current_bp_list) 252 | iteration += 1 253 | if new_bp != 0 and iteration > max_iter: 254 | print("Reached max iteration, stopping before converged.") 255 | else: 256 | bp_list.extend(current_bp_list) 257 | 258 | bp_list = _check_bp_list(bp_list) 259 | structure = convert_bp_list_to_dotbracket(bp_list, length) 260 | structure = post_process_struct(structure, allowed_buldge_len, min_len_helix) 261 | bp_list = convert_dotbracket_to_bp_list(structure, allow_pseudoknots=True) 262 | return structure, bp_list 263 | 264 | 265 | def _check_bp_list(bp_list): 266 | for bp in bp_list: 267 | bp.sort() 268 | bp_list.sort(key=lambda x: x[0]) 269 | nts = [nt for bp in bp_list for nt in bp] 270 | if len(nts) > len(set(nts)): 271 | print("WARNING some nucletotides found in more than 1 bp") 272 | for i, bpA in enumerate(bp_list): 273 | for bpB in bp_list[i + 1:]: 274 | if bpA[0] == bpB[0] and bpA[1] == bpB[1]: 275 | print("removing repeat bp", bpA) 276 | bp_list = bp_list[:i] + bp_list[i + 1:] 277 | elif bpA[0] in bpB: 278 | if abs(bpA[0] - bpA[1]) <= abs(bpB[0] - bpB[1]): 279 | to_remove = bpB 280 | else: 281 | to_remove = bpA 282 | print("WARNING base", bpA[0], "is in 2 basepairs", bpA, bpB, "THIS SHOULD BE FIXED. Removing", to_remove) 283 | bp_list.remove(to_remove) 284 | elif bpA[1] in bpB: 285 | if abs(bpA[0] - bpA[1]) <= abs(bpB[0] - bpB[1]): 286 | to_remove = bpB 287 | else: 288 | to_remove = bpA 289 | print("WARNING base", bpA[1], "is in 2 basepairs", bpA, bpB, "THIS SHOULD BE FIXED. Removing", to_remove) 290 | bp_list.remove(to_remove) 291 | return bp_list 292 | 293 | 294 | def _run_hotknots(seq, model="DP", param="parameters_DP03.txt"): 295 | hotknot_location = package_locs["hotknots"] 296 | cur_dir = getcwd() 297 | chdir(hotknot_location) 298 | command = [f"{hotknot_location}/HotKnots", "-noPS", "-s", seq, "-m", model, "-p", f"{hotknot_location}/params/{param}"] 299 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE) 300 | out, err = p.communicate() 301 | if p.returncode: 302 | print('ERROR: hotknots failed: on %s\n%s\n%s' % (seq, out.decode(), err.decode())) 303 | return ["x"*len(seq)] 304 | output = out.decode().split("\n")[2:-1] 305 | structs = [] 306 | for struct in output: 307 | x = struct.split('\t') 308 | x2 = [x[0].split(" ")[-1], x[1]] 309 | structs.append(x2) 310 | chdir(cur_dir) 311 | return structs 312 | 313 | 314 | def _ipknot_mfe(seq, model="LinearPartition-C", refinement=1, t1="auto", t2="auto"): 315 | """ 316 | TODO 317 | -g, --gamma G The weight for true base-pairs equivalent to 318 | '-t 1/(gamma+1)' 319 | -i, --allow-isolated Allow isolated base-pairs 320 | -P, --param FILE Read the energy parameter file for Vienna RNA 321 | package 322 | -x, --aux Import an auxiliary file for base-pairing 323 | probabilities 324 | -u, --no-levelwise Do not perform the levelwise prediction 325 | -E, --energy Output with the free energy 326 | """ 327 | ipknot_location = package_locs["ipknot"] 328 | out_folder = get_random_folder() 329 | mkdir(out_folder) 330 | fasta_file = f"{out_folder}/temp.fasta" 331 | f = open(fasta_file, "w") 332 | f.write(">seq \n") 333 | f.write(seq) 334 | f.close() 335 | command = [f"{ipknot_location}/ipknot", fasta_file, "--model", model, "-r", str(refinement), "-t", str(t1), "-t", str(t2)] 336 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE) 337 | out, err = p.communicate() 338 | if p.returncode: 339 | print('ERROR: ipknot failed: on %s\n%s\n%s' % (seq, out.decode(), err.decode())) 340 | remove(fasta_file) 341 | rmdir(out_folder) 342 | return "x"*len(seq) 343 | output = out.decode().split("\n") 344 | remove(fasta_file) 345 | rmdir(out_folder) 346 | return output[2] 347 | 348 | 349 | def _knotty_mfe(seq): 350 | knotty_location = package_locs["knotty"] 351 | command = [f"{knotty_location}/knotty", seq] 352 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE,universal_newlines=True) 353 | try: 354 | out, err = p.communicate() 355 | except: 356 | print("ERROR knotty, could not communicate") 357 | return "x"*len(seq) 358 | if p.returncode: 359 | print('ERROR: knotty failed: on %s\n%s\n%s' % (seq, out, err)) 360 | return "x"*len(seq) 361 | output = out.split("\n") 362 | struct = output[1].split(" ")[1] 363 | bp_list = convert_dotbracket_to_bp_list(struct, allow_pseudoknots=True) 364 | struct = convert_bp_list_to_dotbracket(bp_list, seq_len=len(struct)) 365 | return struct 366 | 367 | 368 | def _run_spotrna(seq, cpu=32): 369 | ''' 370 | SPOT-RNA 371 | ''' 372 | spotrna_location = package_locs["spotrna"] 373 | spotrna_conda_env = package_locs["spotrna_conda_env"] 374 | out_folder = get_random_folder() 375 | mkdir(out_folder) 376 | input_id = local_rand_filename() 377 | fasta_file = f"{out_folder}/{input_id}.fasta" 378 | f = open(fasta_file, "w") 379 | f.write(">seq\n") 380 | f.write(seq) 381 | f.close() 382 | command = [f"{spotrna_conda_env}/python3", f"{spotrna_location}/SPOT-RNA.py", "--inputs", fasta_file, "--outputs", out_folder, "--cpu", str(cpu)] 383 | # keep running until output file exists 384 | while not path.exists(out_folder + "/seq.bpseq"): 385 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE) 386 | out, err = p.communicate() 387 | # print(seq, out.decode(),err.decode()) 388 | if p.returncode: 389 | print('ERROR: spotrna failed: on %s\n%s\n%s' % (seq, out.decode(), err.decode())) 390 | return "x"*len(seq) 391 | bp_list = bpseq_to_bp_list(out_folder + "/seq.bpseq") 392 | struct = convert_bp_list_to_dotbracket(bp_list, len(seq)) 393 | bpp = prob_to_bpp(out_folder + "/seq.prob") 394 | remove(out_folder + "/seq.bpseq") 395 | remove(out_folder + "/seq.prob") 396 | remove(out_folder + "/seq.ct") 397 | remove(fasta_file) 398 | rmdir(out_folder) 399 | return struct, bpp 400 | 401 | def _run_spotrna2(seq): 402 | # TODO 403 | spotrna2_location = package_locs["spotrna2"] 404 | out_folder = get_random_folder() 405 | mkdir(out_folder) 406 | fasta_file = f"{out_folder}/temp.fasta" 407 | f = open(fasta_file, "w") 408 | f.write(">seq\n") 409 | f.write(seq) 410 | f.close() 411 | command = [f"{spotrna2_location}/run_spotrna2.sh", fasta_file] 412 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE) 413 | out, err = p.communicate() 414 | if p.returncode: 415 | print('ERROR: spotrna2 failed: on %s\n%s\n%s' % (seq, out.decode(), err.decode())) 416 | return "x"*len(seq) 417 | bp_list = bpseq_to_bp_list(f"{out_folder}/temp_outputs/temp.bpseq") 418 | struct = convert_bp_list_to_dotbracket(bp_list, len(seq)) 419 | bpp = prob_to_bpp(f"{out_folder}/temp_outputs/temp.prob") 420 | for f in os.listdir(f"{out_folder}/temp_outputs"): 421 | remove(f) 422 | rmdir(f"{out_folder}/temp_outputs") 423 | for f in os.listdir(f"{out_folder}/temp_features"): 424 | remove(f) 425 | rmdir(f"{out_folder}/temp_features") 426 | remove(fasta_file) 427 | rmdir(out_folder) 428 | return struct, bpp 429 | 430 | def _e2efold(seq): 431 | # only if <600 432 | # TODO probably plenty of options 433 | e2efold_location = package_locs["e2efold"] 434 | e2efold_conda_env = package_locs["e2efold_conda_env"] 435 | out_folder = get_random_folder() 436 | mkdir(out_folder) 437 | with open(f'{out_folder}/config.json', 'w') as f: 438 | f.write('\n'.join(['{', 439 | ' "exp_name": "performance on short sequences (50-600)",', 440 | f' "test_folder": "{out_folder}/short_seqs",', 441 | f' "save_folder": "{out_folder}/short_cts",', 442 | ' "gpu": "0",', 443 | ' "u_net_d": 10,', 444 | ' "BATCH_SIZE": 8,', 445 | ' "batch_size_stage_1": 20,', 446 | ' "batch_size_stage_2": 16,', 447 | ' "OUT_STEP": 100,', 448 | ' "LOAD_MODEL": true,', 449 | ' "pp_steps": 20,', 450 | ' "pp_loss": "f1",', 451 | ' "pp_model": "mixed",', 452 | ' "rho_per_position": "matrix",', 453 | ' "data_type": "rnastralign_all_600",', 454 | ' "model_type": "att_simple_fix",', 455 | ' "epoches_first": 50,', 456 | ' "epoches_second": 10,', 457 | ' "epoches_third": 10,', 458 | ' "evaluate_epi": 1,', 459 | ' "evaluate_epi_stage_1": 5,', 460 | ' "step_gamma": 1,', 461 | ' "k": 1,', 462 | ' "test": {', 463 | ' "f1": true,', 464 | ' "accuracy": false,', 465 | ' "energy": false', 466 | ' }', 467 | '}'])) 468 | mkdir(f'{out_folder}/short_seqs') 469 | mkdir(f'{out_folder}/short_cts') 470 | command = [f"{e2efold_conda_env}/python", f"{e2efold_location}/e2efold_productive_short.py", "-c", f"{out_folder}/config.json"] 471 | fasta_file = f"{out_folder}/short_seqs/temp.seq" 472 | f = open(fasta_file, "w") 473 | f.write(seq) 474 | f.close() 475 | # keep running until output file exists 476 | while not path.exists(f"{out_folder}/short_cts/temp.seq.ct"): 477 | out, err = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE).communicate() 478 | bp_list = ct_to_bp_list(f"{out_folder}/short_cts/temp.seq.ct", 1) 479 | struct = convert_bp_list_to_dotbracket(bp_list, len(seq)) 480 | remove(fasta_file) 481 | remove(f"{out_folder}/short_cts/temp.seq.ct") 482 | remove(f"{out_folder}/config.json") 483 | rmdir(f'{out_folder}/short_seqs') 484 | rmdir(f'{out_folder}/short_cts') 485 | rmdir(out_folder) 486 | return struct 487 | 488 | 489 | def _pknots(seq): 490 | ''' TODO 491 | -a : pseudoknot approx, exclude V7-V10 and WB9-WB1 492 | -c : add L^5 coaxials (V6) 493 | -s : shuffle sequences 494 | ''' 495 | pknots_location = package_locs["pknots"] 496 | out_folder = get_random_folder() 497 | mkdir(out_folder) 498 | fasta_file = f"{out_folder}/temp.fasta" 499 | f = open(fasta_file, "w") 500 | f.write(">seq \n") 501 | f.write(seq) 502 | f.close() 503 | outfile = f"{out_folder}/out.out" 504 | command = [pknots_location + "/pknots", "-k", "-g", fasta_file, outfile] 505 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE) 506 | out, err = p.communicate() 507 | remove(fasta_file) 508 | if p.returncode: 509 | print('ERROR: PKNOTS failed: on %s\n%s\n%s' % (seq, out.decode(), err.decode())) 510 | return "x"*len(seq) 511 | bp_list = ct_to_bp_list(outfile, 4) 512 | remove(outfile) 513 | rmdir(out_folder) 514 | struct = convert_bp_list_to_dotbracket(bp_list, len(seq)) 515 | return struct 516 | 517 | 518 | def _nupack_mfe_pk(seq): 519 | # TODO many nupack options... also why is this not implemented in mfe? 520 | nupack_location = package_locs['nupack'] 521 | out_folder = get_random_folder() 522 | mkdir(out_folder) 523 | fasta_file = f"{out_folder}/temp" 524 | f = open(f'{fasta_file}.in','w') 525 | f.write(seq) 526 | f.close() 527 | struct = None 528 | command = [nupack_location+'/mfe', "-pseudo", fasta_file] 529 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE) 530 | out,err = p.communicate() 531 | if p.returncode: 532 | print(f'ERROR: nupack mfe pk failed on {seq} {fasta_file} {out.decode} {err.decode}') 533 | return 'x'*len(seq) 534 | f = open(f'{fasta_file}.mfe') 535 | struct = f.readlines()[16][:-1] 536 | f.close() 537 | remove(f'{fasta_file}.in') 538 | remove(f'{fasta_file}.mfe') 539 | rmdir(out_folder) 540 | return struct 541 | -------------------------------------------------------------------------------- /src/arnie/sample_structures.py: -------------------------------------------------------------------------------- 1 | import os, re, sys, shutil 2 | import subprocess as sp 3 | import random, string 4 | import numpy as np 5 | from .utils import * 6 | 7 | DEBUG=False 8 | 9 | # load package locations from yaml file, watch! global dict 10 | package_locs = load_package_locations() 11 | 12 | def sample_structures(seq, n_samples = 10, package='vienna_2', T=37, constraint=None, param_file=None, 13 | dangles=True, reweight=None, nonredundant=False): 14 | ''' Draw stochastic sampled structures for RNA sequence. Possible packages: 'eternafold', 'vienna_2' 15 | 16 | Args: 17 | seq (str): nucleic acid sequence 18 | T (float): temperature (Celsius) 19 | constraint (str): structure constraints 20 | motif (str): argument to vienna motif 21 | dangles (bool): dangles or not, specifiable for vienna, nupack 22 | noncanonical(bool): include noncanonical pairs or not (for contrafold, RNAstructure (Cyclefold)) 23 | 24 | Returns 25 | list of structures 26 | list of energies 27 | list of probabilities 28 | ''' 29 | 30 | try: 31 | pkg, version = package.lower().split('_') 32 | except: 33 | pkg, version = package.lower(), None 34 | 35 | if not dangles and pkg not in ['vienna','nupack']: 36 | print('Warning: %s does not support dangles options' % pkg) 37 | 38 | if pkg=='vienna': 39 | struct_list = sample_vienna_(seq, n_samples=n_samples, version=version, T=T, 40 | dangles=dangles, constraint=constraint, reweight=reweight, nonredundant = nonredundant) 41 | 42 | elif pkg=='eternafold': 43 | struct_list = sample_eternafold_(seq, n_samples=n_samples, param_file=param_file, constraint=constraint, nonredundant = nonredundant) 44 | 45 | else: 46 | raise ValueError('package %s either not understood or not supported at this moment.' % package) 47 | 48 | return struct_list 49 | 50 | def sample_vienna_(seq, n_samples=10, T=37, version='2', constraint=None, 51 | dangles=True, reweight=None, nonredundant=False): 52 | """Stochastically sample structures from Vienna RNAsubopt. 53 | 54 | Inputs: 55 | seq (str): nucleic acid sequence 56 | n_samples (int): number of structures to sample. 57 | T (float): temperature 58 | constraint (str): structure constraints 59 | motif (str): argument to vienna motif 60 | Outputs: 61 | struct_list (list): list of stochastically-sampled structures. 62 | """ 63 | 64 | if not version: 65 | version='2' 66 | 67 | if version.startswith('2'): 68 | LOC=package_locs['vienna_2'] 69 | elif version.startswith('1'): 70 | LOC=package_locs['vienna_1'] 71 | else: 72 | raise RuntimeError('Error, vienna version %s not present' % version) 73 | 74 | command = ['%s/RNAsubopt' % LOC, '-T', str(T), '--stochBT_en=%d' % n_samples]#, '-N'] 75 | 76 | if constraint is not None: 77 | fname = write([seq, constraint]) 78 | command.append('-C') 79 | #command.append('--enforceConstraint') 80 | else: 81 | fname = write([seq]) 82 | 83 | if not dangles: 84 | command.append('--dangles=0') 85 | 86 | if nonredundant: 87 | command.append('-N') 88 | 89 | if reweight is not None: 90 | command.append('--commands=%s' % reweight) 91 | 92 | with open(fname) as f: 93 | if DEBUG: print(fname) 94 | if DEBUG: print(' '.join(command)) 95 | p = sp.Popen(command, stdin=f, stdout=sp.PIPE, stderr=sp.PIPE) 96 | stdout, stderr = p.communicate() 97 | 98 | if DEBUG: 99 | print('stdout') 100 | print(stdout) 101 | print('stderr') 102 | print(stderr) 103 | 104 | if p.returncode: 105 | raise Exception('RNAsubopt failed: on %s\n%s' % (seq, stderr)) 106 | os.remove(fname) 107 | 108 | if 'omitting constraint' in stderr.decode('utf-8'): 109 | raise RuntimeError("Constraint omitted, Impossible structure") 110 | 111 | else: 112 | struct_list, prob_list, energy_list = [],[],[] 113 | output_lines = stdout.decode('utf-8').split('\n')[1:-1] # first line is just repeating sequence, last is empty space 114 | for line in output_lines: 115 | struct_list.append(line.split(' ')[0]) 116 | # prob_list.append(float(line.split(' ')[-2])) 117 | # energy_list.append(float(line.split(' ')[-1])) 118 | 119 | return struct_list 120 | 121 | def sample_eternafold_(seq, n_samples=10, param_file=None, constraint=None, nonredundant=False): 122 | """Stochastically sample structures from EternaFold. 123 | 124 | Inputs: 125 | seq (str): nucleic acid sequence 126 | n_samples (int): number of structures to sample. 127 | T (float): temperature 128 | constraint (str): structure constraints 129 | motif (str): argument to vienna motif 130 | Outputs: 131 | struct_list (list): list of stochastically-sampled structures. 132 | """ 133 | 134 | fname = '%s.in' % filename() 135 | LOC=package_locs['eternafold'] 136 | 137 | 138 | command = ['%s/contrafold' % LOC, 'sample', fname] 139 | 140 | if param_file is not None: 141 | command = command + ['--params', param_file] 142 | else: 143 | command = command + ['--params', package_locs['eternafoldparams']] 144 | 145 | if constraint is not None: 146 | convert_dbn_to_contrafold_input(seq, constraint, fname) 147 | command.append('--constraints') 148 | else: 149 | convert_dbn_to_contrafold_input(seq, ''.join(['.' for x in range(len(seq))]), fname) 150 | 151 | if DEBUG: print(' '.join(command)) 152 | 153 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE) 154 | 155 | stdout, stderr = p.communicate() 156 | 157 | struct_list = stdout.decode('utf-8').split('\n')[:-1] 158 | 159 | if DEBUG: 160 | print('stdout') 161 | print(stdout) 162 | print('stderr') 163 | print(stderr) 164 | if p.returncode: 165 | raise Exception('Eternafold sample failed: on %s\n%s' % (seq, stderr)) 166 | 167 | os.remove(fname) 168 | return struct_list 169 | -------------------------------------------------------------------------------- /src/arnie/viz.py: -------------------------------------------------------------------------------- 1 | ### 2 | # File is generally for functions that relate to various ways to visualize RNA sequences and experimental data. 3 | ### 4 | 5 | import matplotlib.pyplot as plt 6 | import matplotlib.patches as patches 7 | 8 | def plot_structure_heatmap(data): 9 | # Setup constants 10 | dpi = 100.0 11 | font_size = 12.0 12 | padded_size = 18.0 13 | scaling_ratio = padded_size/dpi 14 | 15 | # Set default text to monospace to align sequence and structures 16 | plt.rcParams["font.family"] = "monospace" 17 | 18 | # Figure Height = sequence + data (experimental, predicted) + predicted structures + (opt) ROI structure 19 | rows = (1+len(data["reactivity"])+len(data["predictions"])) 20 | if "control_structure" in data.keys(): 21 | rows += 1 22 | fig_height = rows * scaling_ratio 23 | 24 | # Figure Width determined by sequence length 25 | fig_width = len(data["sequence"]) * scaling_ratio 26 | 27 | # Create figure 28 | fig, axs = plt.subplots( 29 | nrows=rows, 30 | sharex=True, 31 | figsize=(fig_width, fig_height), 32 | ) 33 | 34 | # SETUP COMMON TO ALL AXES 35 | # Normalize for data range from -2 to 2 36 | norm=plt.Normalize(-2,2) 37 | # Turn off the axis bounding box 38 | [ax.spines[:].set_visible(False) for ax in axs] 39 | # Set all axes x_limits from 0 to the length of the sequence 40 | [ax.set_xlim([0, len(data["sequence"])]) for ax in axs] 41 | # Hide the tick marks on the x axis for all axes 42 | [ax.tick_params(axis='x',length=0) for ax in axs] 43 | # Hide the tick marks on the y axis for all axes 44 | [ax.set_yticks([]) for ax in axs] 45 | [ax.tick_params(axis='y',length=0,pad=6) for ax in axs] 46 | # Plot blank image data for all axes (simplifies alignment of text in sequence and prediction plotting) 47 | [ax.imshow([[0]*(len(data["sequence"])+1)], cmap="bwr",norm=norm) for ax in axs] 48 | # Collapse sub-plot spacing 49 | fig.subplots_adjust(hspace=0) 50 | 51 | # Set the plot title 52 | axs[0].set_title(data["title"], fontweight='bold') 53 | 54 | ################################################ 55 | # PLOTTING 56 | ################################################ 57 | ax_index = 0 58 | 59 | # Plot sequence text 60 | axs[0].set_yticks([0],labels=["sequence"]) 61 | for (i, char) in enumerate(data["sequence"]): 62 | if (i>=0): 63 | axs[0].text(i,0,char,fontfamily='monospace', ha="center", va="center") 64 | 65 | # Create the reactivity data heatmap 66 | for (j, data_label) in enumerate(data["reactivity"]): 67 | ax_index += 1 68 | axs[ax_index].set_yticks([0],labels=[data_label]) 69 | reactivity = data["reactivity"][data_label]["data"] 70 | BLANK_OUT5 = data["reactivity"][data_label]["BLANK_OUT5"] 71 | BLANK_OUT3 = data["reactivity"][data_label]["BLANK_OUT3"] 72 | 73 | # Reactivity needs to be a list of numbers 74 | if type(reactivity) != list or type(reactivity[0]) != float: 75 | print("WARNING: reactivity data in unexpected format") 76 | 77 | # If the data has blank out regions, add them. 78 | display_data = [-1.0] * BLANK_OUT5 + reactivity + [-1.0] * BLANK_OUT3 79 | 80 | # Plot the heatmap and the blank out regions 81 | pos = axs[ax_index].imshow([display_data], cmap="bwr", norm=norm) 82 | blank5 = patches.Rectangle((-0.5,-0.5),BLANK_OUT5,1,color="gray") 83 | axs[ax_index].add_patch(blank5) 84 | blank3 = patches.Rectangle((len(display_data)-BLANK_OUT3-0.5,-0.5),BLANK_OUT3,1,color="gray") 85 | axs[ax_index].add_patch(blank3) 86 | 87 | plt.colorbar(pos, ax=axs) 88 | 89 | # Plot control structure (if provided) 90 | if "control_structure" in data.keys(): 91 | if type(data["control_structure"]["start_index"]) != float and type(data["control_structure"]["structure"]) != str: 92 | return 93 | ax_index += 1 94 | axs[ax_index].set_yticks([0],labels=["Control Structure"]) 95 | start_index = data["control_structure"]["start_index"] 96 | for (i, char) in enumerate(data["control_structure"]["structure"]): 97 | if (i>=0): 98 | axs[ax_index].text(i+start_index,0,char,fontfamily='monospace', ha="center", va="center") 99 | 100 | # Plot predictions text 101 | for (j, predictor_name) in enumerate(data["predictions"]): 102 | ax_index += 1 103 | axs[ax_index].set_yticks([0],labels=[predictor_name]) 104 | for (i, char) in enumerate(data["predictions"][predictor_name]): 105 | if (i>=0): 106 | axs[ax_index].text(i,0,char,fontfamily='monospace', ha="center", va="center") 107 | if ax_index == rows-1: 108 | axs[ax_index].tick_params(axis='x',length=4,direction='out') 109 | 110 | return fig -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_bpps.py: -------------------------------------------------------------------------------- 1 | from arnie.bpps import bpps 2 | from arnie.utils import load_package_locations 3 | 4 | sample_seq = 'CGCUGUCUGUACUUGUAUCAGUACACUGACGAGUCCCUAAAGGACGAAACAGCG' 5 | 6 | 7 | def test_bpps(pkg): 8 | p = bpps(sample_seq, package=pkg) 9 | print('test bpps %s' % pkg) 10 | print(p[0]) 11 | 12 | 13 | if __name__ == '__main__': 14 | print("Test: printing first row of bpp matrices") 15 | package_locs = load_package_locations() 16 | PK_packages = ['hotknots', 'ipknot', 'knotty', 'pknots', 17 | 'spotrna', 'spotrna_conda_env', 'e2efold', 18 | 'e2efold_conda_env', 'spotrna2'] 19 | for pkg in sorted(package_locs.keys()): 20 | if pkg == 'TMP' or pkg.startswith('linear') or pkg in PK_packages: 21 | continue 22 | 23 | test_bpps(pkg) 24 | 25 | -------------------------------------------------------------------------------- /tests/test_evaluation_metrics.py: -------------------------------------------------------------------------------- 1 | from arnie.utils import * 2 | 3 | samiv_struct = "((((....(.((((((....((.[[[[[)).)))))))(((..((((((..{{{{)).)))).)))]]]]]....))))..((((.(((((.......))))).))))....}}}}..." 4 | 5 | hotknots = "((((((....(((((((......[[[[[..))))))).....(((((....{{{{{..)))))...]]]]]..))))))..((((.(((((((...))))))).))))...}}}}}..." 6 | ipknot = "[[[[[[....[[[..........(((((((((((]]].((((.(((.((......)).))).)))).......]]]]]].......(((((((...)))))))..))))))).)))).." 7 | knotty = "(((..[[[[[))).]].]]]...[[[[[[[[[[[[...[[[[.[[[.[[......]].]]].]]]]((.(((....))).))....(((((((...))))))).]]]]]]]].]]]].." 8 | spotrna = ".(((((((....(((((...((.....[[.)))))))]](((.((((((((....)))).)))))))......))))))).......((((((((.))))))))(.........)...." 9 | e2efold = "......(............((...(.......(.............)..........)).)........(.(......(.(.(.([...).]).)...)....).).....)......." 10 | pknots = ".(((.......))).((.....))(((((((((((((((((((.((((((......)).))))))))...(((....))).)))...((((((.....)))))).)))))))).))))." 11 | empty = "." * len(samiv_struct) 12 | 13 | 14 | def test_is_pk(): 15 | assert(is_PK(samiv_struct)) 16 | assert(not is_PK(pknots)) 17 | 18 | 19 | def test_compare_struct(): 20 | assert(compare_structure_to_native(hotknots, samiv_struct, metric="PPV") == 0.8205128205128205) 21 | assert(compare_structure_to_native(hotknots, samiv_struct, metric="sensitivity") == 0.8) 22 | assert(compare_structure_to_native(hotknots, samiv_struct, metric="F1_score") == 0.810126582278481) 23 | assert(compare_structure_to_native(hotknots, samiv_struct, metric="all")["F1_score"] == 0.810126582278481) 24 | assert(compare_structure_to_native(empty, samiv_struct, metric="all")["F1_score"] == 0) 25 | assert(compare_structure_to_native(hotknots, samiv_struct, metric="all", PK_involved=True)["F1_score"] == 0.7796610169491526) 26 | assert(compare_structure_to_native(hotknots, samiv_struct, metric="all", PK_involved=False)["F1_score"] == 0.9) 27 | 28 | 29 | def test_compare_structs(): 30 | assert(0.4266666666666667 == compare_structures_to_natives([hotknots, spotrna], [samiv_struct, samiv_struct], comparison="basepairs")['PPV']) 31 | assert(1.0 == compare_structures_to_natives([hotknots, spotrna], [samiv_struct, samiv_struct], comparison="is_PK")["F1_score"]) 32 | assert(1.0 == compare_structures_to_natives([hotknots, spotrna, pknots, empty], [samiv_struct, samiv_struct, samiv_struct, samiv_struct], comparison="is_PK", metric="PPV")) 33 | assert(0.5 == compare_structures_to_natives([hotknots, spotrna, pknots, empty], [samiv_struct, samiv_struct, samiv_struct, samiv_struct], comparison="is_PK", metric="sensitivity")) 34 | assert(0.6666666666666666 == compare_structures_to_natives([hotknots, spotrna, pknots, empty], [samiv_struct, samiv_struct, samiv_struct, samiv_struct], comparison="is_PK", metric="F1_score")) 35 | assert(0.25 == compare_structures_to_natives([hotknots, spotrna, pknots, empty], [samiv_struct, samiv_struct, samiv_struct, samiv_struct], comparison="non_PK_basepairs")["sensitivity"]) 36 | assert(compare_structures_to_natives([hotknots, spotrna, pknots, empty], [samiv_struct, samiv_struct, samiv_struct, samiv_struct], comparison="PK_basepairs")["F1_score"] == 0.28571428571428575) 37 | 38 | 39 | if __name__ == '__main__': 40 | test_is_pk() 41 | test_compare_struct() 42 | test_compare_structs() 43 | -------------------------------------------------------------------------------- /tests/test_file_readers.py: -------------------------------------------------------------------------------- 1 | from arnie.utils import * 2 | 3 | bp_list = [[1, 53], [2, 52], [3, 51], [4, 50]] 4 | bpseq_file = "test_files/seq.bpseq" 5 | ct_file = "test_files/seq.ct" 6 | prob = [[0.000000000000000000e+00, 0.000000000000000000e+00, 1.763635280966738210e-08, 3.609733185304499478e-10, 1.899559890269620151e-08, 2.357239330957941875e-08, 4.468964148624955998e-08, 1.829342375496644303e-07, 4.127057055650768230e-06, 6.457873937490533129e-06, 5.384244594784908830e-06, 2.036780995975695890e-06, 8.030412357417303880e-05, 9.388004334808641715e-01], 7 | [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 7.471370797708187664e-11, 1.984224746020934790e-09, 1.454165406835343696e-07, 7.498508237767878982e-07, 3.865106868873978322e-06, 3.536102881585034987e-05, 2.394675941956625451e-05, 1.153705070502445834e-05, 2.612278010790368161e-04, 9.977645040109642816e-01, 3.237492435080424613e-02], 8 | [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 1.888956084283119840e-07, 5.413485565351087927e-07, 1.541276902667509429e-06, 1.126288121100931219e-05, 1.173869736654523994e-04, 7.325411664786217837e-06, 4.003005106708449522e-05, 9.983489958892994842e-01, 6.129986080705812287e-03, 1.054258712935700630e-04], 9 | [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 1.665249372829090312e-07, 9.497187209883291010e-06, 1.414039620915108573e-04, 3.243636730206535555e-05, 1.567102777497324565e-05, 9.975833150208464062e-01, 5.034934175522413902e-03, 4.125712961733680345e-04, 2.274785721191600433e-07], 10 | [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 1.738593674907211730e-05, 8.545859784264292502e-05, 3.527927141337322309e-04, 9.980848491909390940e-01, 5.202587741165264415e-03, 1.251151206947209791e-04, 8.513900403421354130e-05, 2.669391452879514996e-07], 11 | [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 4.533978023581152796e-03, 6.727278019311770663e-02, 6.385230293170572440e-04, 1.043096878283355517e-05, 5.352860417885025332e-05, 2.197980140741698389e-06, 7.290393039203351602e-07], 12 | [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 2.247207617456368913e-06, 1.250804117783563030e-06, 1.540204172157810605e-05, 6.227562675838443224e-05, 4.136123245159868602e-06, 1.480250050762932428e-06], 13 | [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 5.317539514120537016e-08, 1.466432823612201115e-06, 4.843728467860043972e-05, 9.024276054982824927e-06, 5.084605728081093467e-07], 14 | [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 1.142360160134971489e-07, 2.745012688262551338e-05, 2.086567406564575274e-05, 2.143480176383162954e-06], 15 | [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 1.219164078076087741e-06, 1.118464018225010500e-05, 7.100719490463872321e-06], 16 | [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 4.536954583383731369e-06, 9.361786362639244079e-06], 17 | [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 1.650802739207468036e-06], 18 | [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00], 19 | [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00]] 20 | prob_file = "test_files/seq.prob" 21 | 22 | 23 | def test_file_converters(): 24 | assert(bpseq_to_bp_list(bpseq_file, header_length=1) == bp_list) 25 | assert(ct_to_bp_list(ct_file, header_length=2) == bp_list) 26 | assert(prob_to_bpp(prob_file).tolist() == prob) 27 | assert(bpseq_to_bp_list(bpseq_file, header_length=3) != bp_list) 28 | assert(ct_to_bp_list(ct_file, header_length=4) != bp_list) 29 | 30 | if __name__ == '__main__': 31 | test_file_converters() 32 | -------------------------------------------------------------------------------- /tests/test_files/seq.bpseq: -------------------------------------------------------------------------------- 1 | #seq 2 | 1 A 0 3 | 2 C 54 4 | 3 A 53 5 | 4 G 52 6 | 5 C 51 7 | 6 U 0 8 | 7 A 0 9 | 8 C 0 10 | 9 G 0 11 | 10 U 0 12 | 11 C 0 13 | 12 A 0 14 | 13 G 0 15 | 14 U 0 16 | 15 G 0 17 | 16 C 0 18 | 17 A 0 19 | 18 G 0 20 | 19 U 0 21 | 20 A 0 22 | 21 C 0 23 | 22 G 0 24 | 23 G 0 25 | 24 G 0 26 | 25 C 0 27 | 26 C 0 28 | 27 C 0 29 | 28 C 0 30 | 29 C 0 31 | 30 C 0 32 | 31 C 0 33 | 32 C 0 34 | 33 C 0 35 | 34 C 0 36 | 35 C 0 37 | 36 U 0 38 | 37 U 0 39 | 38 U 0 40 | 39 U 0 41 | 40 U 0 42 | 41 U 0 43 | 42 U 0 44 | 43 A 0 45 | 44 C 0 46 | 45 G 0 47 | 46 U 0 48 | 47 C 0 49 | 48 G 0 50 | 49 A 0 51 | 50 U 0 52 | 51 G 5 53 | 52 C 4 54 | 53 U 3 55 | 54 G 2 56 | -------------------------------------------------------------------------------- /tests/test_files/seq.ct: -------------------------------------------------------------------------------- 1 | 54 seq SPOT-RNA output 2 | 3 | 1 A 0 2 0 1 4 | 2 C 1 3 54 2 5 | 3 A 2 4 53 3 6 | 4 G 3 5 52 4 7 | 5 C 4 6 51 5 8 | 6 U 5 7 0 6 9 | 7 A 6 8 0 7 10 | 8 C 7 9 0 8 11 | 9 G 8 10 0 9 12 | 10 U 9 11 0 10 13 | 11 C 10 12 0 11 14 | 12 A 11 13 0 12 15 | 13 G 12 14 0 13 16 | 14 U 13 15 0 14 17 | 15 G 14 16 0 15 18 | 16 C 15 17 0 16 19 | 17 A 16 18 0 17 20 | 18 G 17 19 0 18 21 | 19 U 18 20 0 19 22 | 20 A 19 21 0 20 23 | 21 C 20 22 0 21 24 | 22 G 21 23 0 22 25 | 23 G 22 24 0 23 26 | 24 G 23 25 0 24 27 | 25 C 24 26 0 25 28 | 26 C 25 27 0 26 29 | 27 C 26 28 0 27 30 | 28 C 27 29 0 28 31 | 29 C 28 30 0 29 32 | 30 C 29 31 0 30 33 | 31 C 30 32 0 31 34 | 32 C 31 33 0 32 35 | 33 C 32 34 0 33 36 | 34 C 33 35 0 34 37 | 35 C 34 36 0 35 38 | 36 U 35 37 0 36 39 | 37 U 36 38 0 37 40 | 38 U 37 39 0 38 41 | 39 U 38 40 0 39 42 | 40 U 39 41 0 40 43 | 41 U 40 42 0 41 44 | 42 U 41 43 0 42 45 | 43 A 42 44 0 43 46 | 44 C 43 45 0 44 47 | 45 G 44 46 0 45 48 | 46 U 45 47 0 46 49 | 47 C 46 48 0 47 50 | 48 G 47 49 0 48 51 | 49 A 48 50 0 49 52 | 50 U 49 51 0 50 53 | 51 G 50 52 5 51 54 | 52 C 51 53 4 52 55 | 53 U 52 54 3 53 56 | 54 G 53 0 2 54 57 | -------------------------------------------------------------------------------- /tests/test_files/seq.prob: -------------------------------------------------------------------------------- 1 | 0.000000000000000000e+00 0.000000000000000000e+00 1.763635280966738210e-08 3.609733185304499478e-10 1.899559890269620151e-08 2.357239330957941875e-08 4.468964148624955998e-08 1.829342375496644303e-07 4.127057055650768230e-06 6.457873937490533129e-06 5.384244594784908830e-06 2.036780995975695890e-06 8.030412357417303880e-05 9.388004334808641715e-01 2 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 7.471370797708187664e-11 1.984224746020934790e-09 1.454165406835343696e-07 7.498508237767878982e-07 3.865106868873978322e-06 3.536102881585034987e-05 2.394675941956625451e-05 1.153705070502445834e-05 2.612278010790368161e-04 9.977645040109642816e-01 3.237492435080424613e-02 3 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.888956084283119840e-07 5.413485565351087927e-07 1.541276902667509429e-06 1.126288121100931219e-05 1.173869736654523994e-04 7.325411664786217837e-06 4.003005106708449522e-05 9.983489958892994842e-01 6.129986080705812287e-03 1.054258712935700630e-04 4 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.665249372829090312e-07 9.497187209883291010e-06 1.414039620915108573e-04 3.243636730206535555e-05 1.567102777497324565e-05 9.975833150208464062e-01 5.034934175522413902e-03 4.125712961733680345e-04 2.274785721191600433e-07 5 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.738593674907211730e-05 8.545859784264292502e-05 3.527927141337322309e-04 9.980848491909390940e-01 5.202587741165264415e-03 1.251151206947209791e-04 8.513900403421354130e-05 2.669391452879514996e-07 6 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 4.533978023581152796e-03 6.727278019311770663e-02 6.385230293170572440e-04 1.043096878283355517e-05 5.352860417885025332e-05 2.197980140741698389e-06 7.290393039203351602e-07 7 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 2.247207617456368913e-06 1.250804117783563030e-06 1.540204172157810605e-05 6.227562675838443224e-05 4.136123245159868602e-06 1.480250050762932428e-06 8 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 5.317539514120537016e-08 1.466432823612201115e-06 4.843728467860043972e-05 9.024276054982824927e-06 5.084605728081093467e-07 9 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.142360160134971489e-07 2.745012688262551338e-05 2.086567406564575274e-05 2.143480176383162954e-06 10 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.219164078076087741e-06 1.118464018225010500e-05 7.100719490463872321e-06 11 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 4.536954583383731369e-06 9.361786362639244079e-06 12 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.650802739207468036e-06 13 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 14 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 15 | -------------------------------------------------------------------------------- /tests/test_helix_getting_and_removing.py: -------------------------------------------------------------------------------- 1 | from arnie.utils import * 2 | 3 | 4 | s = "(((....)).)...(..)....(((..(((....))))))" 5 | s_0_2 = ".((....)).............(((..(((....))))))" 6 | s_0_3 = "......................(((..(((....))))))" 7 | s_1_3 = "(((....)).)...........(((..(((....))))))" 8 | s_2_3 = "(((....)).)...........(((..(((....))))))" 9 | s_0_4 = "........................................" 10 | s_1_4 = "........................................" 11 | s_2_4 = "......................(((..(((....))))))" 12 | s_1_2 = "(((....)).)...........(((..(((....))))))" 13 | 14 | s_all_helices = [[[0, 10]], 15 | [[1, 8], [2, 7]], 16 | [[14, 17]], 17 | [[22, 39], [23, 38], [24, 37]], 18 | [[27, 36], [28, 35], [29, 34]]] 19 | s_1_helices = [[[0, 10], [1, 8], [2, 7]], 20 | [[14, 17]], 21 | [[22, 39], [23, 38], [24, 37]], 22 | [[27, 36], [28, 35], [29, 34]]] 23 | s_2_helices = [[[0, 10], [1, 8], [2, 7]], 24 | [[14, 17]], 25 | [[22, 39], [23, 38], [24, 37], [27, 36], [28, 35], [29, 34]]] 26 | 27 | 28 | pk = "(((.((([..[[..))))((...)){...]]]...)})" 29 | pk_0_2 = "....(((...[[..))).((...))....]]......." 30 | pk_0_3 = "....(((.......)))....................." 31 | pk_1_3 = "..(.(((.......))))...................." 32 | pk_2_3 = "..(.((([..[[..))))...........]]]......" 33 | pk_0_4 = "......................................" 34 | pk_1_4 = "..(.(((.......))))...................." 35 | pk_2_4 = "..(.(((.......))))...................." 36 | pk_1_2 = "(((.(((...[[..))))((...))....]]....).)" 37 | 38 | pk_all_helices = [[[0, 37]], 39 | [[1, 35]], 40 | [[2, 17]], 41 | [[4, 16], [5, 15], [6, 14]], 42 | [[7, 31]], 43 | [[10, 30], [11, 29]], 44 | [[18, 24], [19, 23]], 45 | [[25, 36]]] 46 | pk_1_helices = [[[0, 37], [1, 35]], 47 | [[2, 17], [4, 16], [5, 15], [6, 14]], 48 | [[7, 31]], 49 | [[10, 30], [11, 29]], 50 | [[18, 24], [19, 23]], 51 | [[25, 36]]] 52 | pk_2_helices = [[[0, 37], [1, 35]], 53 | [[2, 17], [4, 16], [5, 15], [6, 14]], 54 | [[7, 31], [10, 30], [11, 29]], 55 | [[18, 24], [19, 23]], 56 | [[25, 36]]] 57 | 58 | 59 | def test_getting_helix(): 60 | assert(get_helices(s, allowed_buldge_len=0) == s_all_helices) 61 | assert(get_helices(pk, allowed_buldge_len=0) == pk_all_helices) 62 | assert(get_helices(s, allowed_buldge_len=1) == s_1_helices) 63 | assert(get_helices(pk, allowed_buldge_len=1) == pk_1_helices) 64 | assert(get_helices(s, allowed_buldge_len=2) == s_2_helices) 65 | assert(get_helices(pk, allowed_buldge_len=2) == pk_2_helices) 66 | 67 | 68 | def test_removing_helix(): 69 | assert(post_process_struct(s, allowed_buldge_len=0, min_len_helix=1) == s) 70 | # note PKs may swap around their bracket types so fairest to compare bp_list always! 71 | assert(convert_dotbracket_to_bp_list(post_process_struct(pk, allowed_buldge_len=0, min_len_helix=1), len(pk)) == convert_dotbracket_to_bp_list(pk, len(pk))) 72 | assert(post_process_struct(s, allowed_buldge_len=0, min_len_helix=2) == s_0_2) 73 | assert(post_process_struct(pk, allowed_buldge_len=0, min_len_helix=2) == pk_0_2) 74 | assert(post_process_struct(s, allowed_buldge_len=0, min_len_helix=3) == s_0_3) 75 | assert(post_process_struct(pk, allowed_buldge_len=0, min_len_helix=3) == pk_0_3) 76 | assert(post_process_struct(s, allowed_buldge_len=1, min_len_helix=3) == s_1_3) 77 | assert(post_process_struct(pk, allowed_buldge_len=1, min_len_helix=3) == pk_1_3) 78 | assert(post_process_struct(s, allowed_buldge_len=2, min_len_helix=3) == s_2_3) 79 | assert(post_process_struct(pk, allowed_buldge_len=2, min_len_helix=3) == pk_2_3) 80 | assert(post_process_struct(s, allowed_buldge_len=0, min_len_helix=4) == s_0_4) 81 | assert(post_process_struct(pk, allowed_buldge_len=0, min_len_helix=4) == pk_0_4) 82 | assert(post_process_struct(s, allowed_buldge_len=1, min_len_helix=4) == s_1_4) 83 | assert(post_process_struct(pk, allowed_buldge_len=1, min_len_helix=4) == pk_1_4) 84 | assert(post_process_struct(s, allowed_buldge_len=2, min_len_helix=4) == s_2_4) 85 | assert(post_process_struct(pk, allowed_buldge_len=2, min_len_helix=4) == pk_2_4) 86 | assert(post_process_struct(pk, allowed_buldge_len=1, min_len_helix=2) == pk_1_2) 87 | assert(post_process_struct(s, allowed_buldge_len=1, min_len_helix=2) == s_1_2) 88 | 89 | if __name__ == '__main__': 90 | test_getting_helix() 91 | test_removing_helix() 92 | -------------------------------------------------------------------------------- /tests/test_linearpartition.py: -------------------------------------------------------------------------------- 1 | from arnie.free_energy import free_energy 2 | from arnie.mfe import mfe 3 | 4 | seq = 'CGCUGUCUGUACUUGUAUCAGUACACUGACGAGUCCCUAAAGGACGAAACAGCG' 5 | dG = free_energy(seq, linear=True, DEBUG=True) 6 | print(dG) 7 | 8 | dG = free_energy(seq, linear=True, package='contrafold', DEBUG=True) 9 | print(dG) 10 | 11 | dG = free_energy(seq, linear=True, package='eternafold', DEBUG=True) 12 | print(dG) 13 | 14 | struct = mfe(seq, linear=True) 15 | print(struct) 16 | struct = mfe(seq, linear=True, package='contrafold') 17 | print(struct) 18 | struct = mfe(seq, linear=True, package='eternafold') 19 | print(struct) 20 | 21 | -------------------------------------------------------------------------------- /tests/test_pfunc.py: -------------------------------------------------------------------------------- 1 | from arnie.pfunc import pfunc 2 | from arnie.utils import load_package_locations 3 | 4 | sample_seq = 'CGCUGUCUGUACUUGUAUCAGUACACUGACGAGUCCCUAAAGGACGAAACAGCG' 5 | 6 | 7 | def test_pfunc(package): 8 | 9 | Z = pfunc(sample_seq, package=package) 10 | print('test %s' % package, Z) 11 | return 12 | 13 | 14 | if __name__ == '__main__': 15 | package_locs = load_package_locations() 16 | for pkg in sorted(package_locs.keys()): 17 | 18 | if (pkg == 'TMP') or ( 19 | pkg.startswith('linear')) or ( 20 | pkg in ['hotknots', 'ipknot', 'knotty', 'pknots', 'spotrna', 21 | 'spotrna_conda_env', 'e2efold', 'e2efold_conda_env', 22 | 'spotrna2']): 23 | print(f'{pkg} not tested.') 24 | continue 25 | print(pkg) 26 | test_pfunc(pkg.lower()) 27 | 28 | -------------------------------------------------------------------------------- /tests/test_pk.py: -------------------------------------------------------------------------------- 1 | from arnie.pk_predictors import pk_predict, pk_predict_from_bpp 2 | from arnie.utils import prob_to_bpp, load_package_locations 3 | import numpy as np 4 | 5 | # TODO e2efold is stochastic? 6 | # TODO spotrna2 add in? 7 | 8 | samiv_seq = "GGUCAUGAGUGCCAGCGUCAAGCCCCGGCUUGCUGGCCGGCAACCCUCCAACCGCGGUGGGGUGCCCCGGGUGAUGACCAGGUUGAGUAGCCGUGACGGCUACGCGGCAAGCGCGGGUC" 9 | samiv_struct = "((((....(.((((((....((.[[[[[)).)))))))(((..((((((..{{{{)).)))).)))]]]]]....))))..((((.(((((.......))))).))))....}}}}..." 10 | 11 | pk_res = {"hotknots": "((((((....(((((((......[[[[[..))))))).....(((((....{{{{{..)))))...]]]]]..))))))..((((.(((((((...))))))).))))...}}}}}...", 12 | "ipknot": "[[[[[[....[[[..........(((((((((((]]].((((.(((.((......)).))).)))).......]]]]]].......(((((((...)))))))..))))))).))))..", 13 | "knotty": "(((..[[[[[))).]].]]]...((((((((((((...((((.(((.((......)).))).))))((.(((....))).))....(((((((...))))))).)))))))).))))..", 14 | "spotrna": "(((((((....(((((...((.....[[.)))))))]](((.((((((((....)))).)))))))......))))))).......((((((((.))))))))(.........).....", 15 | "e2efold": ".....(............((...(.......(.............)..........)).)........(.(......(.(.(.([...).]).)...)....).).....)........", 16 | "pknots": "(((.......))).((.....))(((((((((((((((((((.((((((......)).))))))))...(((....))).)))...((((((.....)))))).)))))))).)))).."} 17 | 18 | # threshknot_theta_maxIter_buldge_helix 19 | # hungarian_theta_buldge_helix_exp_sig_0p_1p_ln_unpaired 20 | bpp_heuristics = {"threshknot_0.1_1_0_1": "...((......(...........[[[[[[[[[[[[)..((((.(((.((....())).))).))))[[.[[[..))]]].]]....(((((((...))))))).]]]]]]]].]]]]..", 21 | "threshknot_0.4_1_0_1": "...........................(((((((....(((......(........)......)))....................(((((((...)))))))..))))))).......", 22 | "threshknot_0.9_1_0_1": ".......................................................................................................................", 23 | "threshknot_0.1_1_0_3": ".......................((((((((((((...((((.(((............))).))))...(((....))).......(((((((...))))))).)))))))).))))..", 24 | "threshknot_0.1_5_0_1": "(..[[.....[[)........(.((((((((((((]].((((.(((.((....())).))).))))((.(((..]]))).))....(((((((...))))))).)))))))).)))).)", 25 | "hungarian_0.3_0_1_1_None_0.1_0.9_False_True": ".......................(((((((((((....(((..(((.((......)).)))..)))....................(((((((...)))))))..))))))).))))..", 26 | "hungarian_0.3_0_2_4_None_0.1_0.9_False_True": ".......................(((((((((((....((((.(((.((......)).))).))))....................(((((((...)))))))..))))))).))))..", 27 | "hungarian_0.3_0_1_1_None_0_1_False_False": ".......................(((((((((((....((((.(((.((......)).))).))))....................(((((((...)))))))..))))))).))))..", 28 | "hungarian_0.3_0_1_1_3_0.1_0.9_False_True": ".......................(((((((((((....(((..(((.((......)).)))..)))....................(((((((...)))))))..))))))).))))..", 29 | "hungarian_0.8_0_1_1_None_0.1_0.9_False_True": "......................................................................................((((((.....))))))................"} 30 | 31 | 32 | def test_pk(pkg): 33 | print("Testing", pkg) 34 | pred = pk_predict(samiv_seq, pkg) 35 | 36 | assert(pred == pk_res[pkg]) 37 | 38 | 39 | # def bpps and output expected 40 | bpp_file = "test_files/samiv_eternafold.prob" 41 | bpp = prob_to_bpp(bpp_file) 42 | 43 | 44 | def test_pk_from_bpp(): 45 | print("Testing threshknot") 46 | assert(bpp_heuristics["threshknot_0.1_1_0_1"] == pk_predict_from_bpp(bpp, heuristic="threshknot", theta=0.1, max_iter=1, allowed_buldge_len=0, min_len_helix=1)) 47 | assert(bpp_heuristics["threshknot_0.4_1_0_1"] == pk_predict_from_bpp(bpp, heuristic="threshknot", theta=0.4, max_iter=1, allowed_buldge_len=0, min_len_helix=1)) 48 | assert(bpp_heuristics["threshknot_0.9_1_0_1"] == pk_predict_from_bpp(bpp, heuristic="threshknot", theta=0.9, max_iter=1, allowed_buldge_len=0, min_len_helix=1)) 49 | assert(bpp_heuristics["threshknot_0.1_1_0_3"] == pk_predict_from_bpp(bpp, heuristic="threshknot", theta=0.1, max_iter=1, allowed_buldge_len=0, min_len_helix=3)) 50 | assert(bpp_heuristics["threshknot_0.1_5_0_1"] == pk_predict_from_bpp(bpp, heuristic="threshknot", theta=0.1, max_iter=5, allowed_buldge_len=0, min_len_helix=1)) 51 | print("Testing hungarian") 52 | assert(bpp_heuristics["hungarian_0.3_0_1_1_None_0.1_0.9_False_True"] == pk_predict_from_bpp(bpp, heuristic="hungarian", theta=0.3, allowed_buldge_len=0, min_len_helix=1, 53 | exp=1, sigmoid_slope_factor=None, prob_to_0_threshold_prior=0.1, prob_to_1_threshold_prior=0.9, ln=False, add_p_unpaired=True)) 54 | assert(bpp_heuristics["hungarian_0.3_0_2_4_None_0.1_0.9_False_True"] == pk_predict_from_bpp(bpp, heuristic="hungarian", theta=0.3, allowed_buldge_len=2, min_len_helix=4, 55 | exp=1, sigmoid_slope_factor=None, prob_to_0_threshold_prior=0.1, prob_to_1_threshold_prior=0.9, ln=False, add_p_unpaired=True)) 56 | assert(bpp_heuristics["hungarian_0.3_0_1_1_None_0_1_False_False"] == pk_predict_from_bpp(bpp, heuristic="hungarian", theta=0.3, allowed_buldge_len=0, min_len_helix=1, 57 | exp=1, sigmoid_slope_factor=None, prob_to_0_threshold_prior=0, prob_to_1_threshold_prior=1, ln=False, add_p_unpaired=False)) 58 | assert(bpp_heuristics["hungarian_0.3_0_1_1_3_0.1_0.9_False_True"] == pk_predict_from_bpp(bpp, heuristic="hungarian", theta=0.3, allowed_buldge_len=0, min_len_helix=1, 59 | exp=1, sigmoid_slope_factor=3, prob_to_0_threshold_prior=0.1, prob_to_1_threshold_prior=0.9, ln=False, add_p_unpaired=True)) 60 | assert(bpp_heuristics["hungarian_0.8_0_1_1_None_0.1_0.9_False_True"] == pk_predict_from_bpp(bpp, heuristic="hungarian", theta=0.8, allowed_buldge_len=0, min_len_helix=1, 61 | exp=1, sigmoid_slope_factor=None, prob_to_0_threshold_prior=0.1, prob_to_1_threshold_prior=0.9, ln=False, add_p_unpaired=True)) 62 | 63 | 64 | if __name__ == '__main__': 65 | package_locs = load_package_locations() 66 | pk_predictors = ["spotrna", "e2efold", "hotknots", "ipknot", "knotty", "pknots"] 67 | for pkg in pk_predictors: 68 | if pkg not in package_locs: 69 | print("Warning:", pkg, "is not found in the ARNIEFILE, not testing.") 70 | else: 71 | test_pk(pkg) 72 | test_pk_from_bpp() 73 | -------------------------------------------------------------------------------- /tests/test_sample_struct.py: -------------------------------------------------------------------------------- 1 | from arnie.sample_structures import sample_structures 2 | 3 | sample_seq = 'GGGGAAAACCCC' 4 | 5 | 6 | def test_sample_seq(): 7 | 8 | struct_list = sample_structures( 9 | sample_seq, n_samples=10, package='vienna_2') 10 | # sample structures no longer returns energy or prob? 11 | # print(ener_list) # , ener_list, prob_list 12 | # print(prob_list) 13 | return 14 | 15 | 16 | if __name__ == '__main__': 17 | test_sample_seq() 18 | # test_pkg_w_bpps(pkg.lower()) 19 | -------------------------------------------------------------------------------- /tests/test_settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | from arnie.utils import load_package_locations 3 | 4 | 5 | def test_settings(): 6 | package_locs = load_package_locations() 7 | for k in package_locs.keys(): 8 | print(k) 9 | assert os.path.isdir(package_locs[k]) 10 | return 11 | 12 | 13 | if __name__ == '__main__': 14 | test_settings() 15 | -------------------------------------------------------------------------------- /tests/test_structure_handling.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from arnie import utils 3 | 4 | success_cases = [ 5 | ["......", False], 6 | ["......", True], 7 | ["(((((......)))))", False], 8 | ["(((((......)))))", True], 9 | ["[[[[[......]]]]]", False], 10 | ["{{{{{......}}}}}", False], 11 | ["<<<<<......>>>>>", False], 12 | ["((((((((...........)).))))))", False], 13 | ["((((((((...........)).))))))", True], 14 | ["(((.((((((((..((((.(((((....)).)))..))))..))))((((...))))))))...)))", False], 15 | ["(((.((((((((..((((.(((((....)).)))..))))..))))((((...))))))))...)))", True], 16 | [".(((((((((.((((....))))....((((....))))..))))..(((((......)))))...(((.((.(((((((((((((...((((..((((.....))))...)))).))))))))))))))).)))..)))))..", False], 17 | [".(((((((((.((((....))))....((((....))))..))))..(((((......)))))...(((.((.(((((((((((((...((((..((((.....))))...)))).))))))))))))))).)))..)))))..", True], 18 | [".....((((((.....))))))....(((((([[[[[[[[[[[........))))))]]]]]]]]]]]........(((((((....))))))).....................", True], 19 | ["(((.[[[.(((...))).]]].)))", True], 20 | ["(((..[[[.(((...))))))]]]", True], 21 | ["([{AAA.A", True], 22 | [".....[[[[[[.....]]]]]]....[[[[[[[[[[[....]]]]]]]]]]].......(((((((((((.[[[[[)))))))))))[[[[[[[[[[[[....]]]]].]]]]]]]....]]]]].[[[[[[[[[[[[[....]]]]]]]]]]]]].....................", True], 23 | [".....((((((.....))))))....(((((((((((....)))))))))))........((((((((((.<<<<<))))))))))(((((((((((((....))))).))))))))...>>>>>.(((((((((((((....))))))))))))).....................", True], 24 | ["((.(.(.((.(.....).)).)....(((((((((((....)))))))))))......)((((((((((...{{{{.)))))))))).(((((((((((....))))).))))))...)[}}}}..[[[[[[[[[[[[[)...]]]]]]]]]]]]]...].................", True], 25 | ["(((((((([.{)][..((.{]).)(.}).})))))))", True], 26 | ["-(((((..(((((....)))))((((((((.((...-----((((((..(((((((..)))))))(((((({..[[[[[[[)))))))))))..).))).))))))))))))}.]]]]]]]...", True], 27 | ["((((({]a]]", True], 28 | [".....(.((.(.....).)).)....(.((..({(<.{.a.a{........(..(((((.(.[....).)))).)...)).).}).((.(}...}...)>)).A.A...).)..............(((((((((((.(....).)))))))))))............]........", True], 29 | ["..((...((.........))......(((((((....))))))).(((((((....))))))).(((((((....))))))).(((((((((....)))))))))..(((((((....)))))))..(((((((....)))))))...[[[[[[[......[[[[[[[[......[[.....)).....((..........]]......]]]]]]]]......................))..............]]]]]]].....(((((((....)))))))......................", True], 30 | ["((((((((((((((((....)))))))((((.(((((((((((......)))))[[[[[[.)))))).))))(((((((((((((.((.(.....(......(((.(((((((((((.((((.(((((..]]]]]].))))).)))).))(((((((....)))))))(((((((((....)))))))(((((((....)))))))(((.(.((((((((((......)))))[[[[[[.))))).).)))))))))))))).)))..............)..).)).)))))))))))(((((((.(((((..]]]]]].))))).)))))))(((((((....))))))))))))))))))", True], 31 | ["(((....))) (((....)))", False], 32 | ["(((....))) (((....)))", True], 33 | ] 34 | 35 | success_expected_output = [ 36 | "......", 37 | "......", 38 | "(((((......)))))", 39 | "(((((......)))))", 40 | "(((((......)))))", 41 | "(((((......)))))", 42 | "(((((......)))))", 43 | "((((((((...........)).))))))", 44 | "((((((((...........)).))))))", 45 | "(((.((((((((..((((.(((((....)).)))..))))..))))((((...))))))))...)))", 46 | "(((.((((((((..((((.(((((....)).)))..))))..))))((((...))))))))...)))", 47 | ".(((((((((.((((....))))....((((....))))..))))..(((((......)))))...(((.((.(((((((((((((...((((..((((.....))))...)))).))))))))))))))).)))..)))))..", 48 | ".(((((((((.((((....))))....((((....))))..))))..(((((......)))))...(((.((.(((((((((((((...((((..((((.....))))...)))).))))))))))))))).)))..)))))..", 49 | ".....((((((.....))))))....(((((([[[[[[[[[[[........))))))]]]]]]]]]]]........(((((((....))))))).....................", 50 | "(((.(((.(((...))).))).)))", 51 | "(((..[[[.(((...))))))]]]", 52 | "([{AAA.A", 53 | ".....((((((.....))))))....(((((((((((....))))))))))).......(((((((((((.[[[[[)))))))))))((((((((((((....))))).)))))))....]]]]].(((((((((((((....))))))))))))).....................", 54 | ".....((((((.....))))))....(((((((((((....)))))))))))........((((((((((.[[[[[))))))))))(((((((((((((....))))).))))))))...]]]]].(((((((((((((....))))))))))))).....................", 55 | "((.(.(.((.(.....).)).)....(((((((((((....)))))))))))......)((((((((((...{{{{.)))))))))).(((((((((((....))))).))))))...)[}}}}..[[[[[[[[[[[[[)...]]]]]]]]]]]]]...].................", 56 | "(((((((([.{)](..[[.{)].](.}).})))))))", 57 | ".(((((..(((((....)))))((((((((.((........((((((..(((((((..)))))))(((((([..{{{{{{{)))))))))))..).))).))))))))))))].}}}}}}}...", 58 | "((((([{>AA", 59 | ".....(.((.(.....).)).)....(.((..({(<.{.a.a{........(..(((((.(.[....).)))).)...)).).}).((.(}...}...)>)).A.A...).)..............(((((((((((.(....).)))))))))))............]........", 60 | "..((...((.........))......(((((((....))))))).(((((((....))))))).(((((((....))))))).(((((((((....)))))))))..(((((((....)))))))..(((((((....)))))))...[[[[[[[......[[[[[[[[......[[.....)).....((..........]]......]]]]]]]]......................))..............]]]]]]].....(((((((....)))))))......................", 61 | "((((((((((((((((....)))))))((((.(((((((((((......)))))[[[[[[.)))))).))))(((((((((((((.((.(.....(......(((.(((((((((((.((((.(((((..]]]]]].))))).)))).))(((((((....)))))))(((((((((....)))))))(((((((....)))))))(((.(.((((((((((......)))))[[[[[[.))))).).)))))))))))))).)))..............)..).)).)))))))))))(((((((.(((((..]]]]]].))))).)))))))(((((((....))))))))))))))))))", 62 | "(((....))).(((....)))", 63 | "(((....))).(((....)))", 64 | ] 65 | 66 | def test_structure_sanitization_success(): 67 | 68 | for (i, case) in enumerate(success_cases): 69 | bp_list = utils.convert_dotbracket_to_bp_list(case[0], allow_pseudoknots=case[1]) 70 | dbn = utils.convert_bp_list_to_dotbracket(bp_list, seq_len=len(case[0])) 71 | assert(dbn == success_expected_output[i]) 72 | 73 | failure_cases = [ 74 | ["(((...))))", False], 75 | ["(((...))))", True], 76 | ["(((", False], 77 | ["(((", True], 78 | ["...)))", False], 79 | ["...)))", True], 80 | ["xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", False], 81 | ["xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", True], 82 | ["(((.[[[.(((...))).]]].)))", False], 83 | ["aaa.....AAA", False] 84 | ] 85 | failure_expected_output = [ 86 | "Unbalanced parenthesis notation: found closing character ')'", 87 | "Unbalanced parenthesis notation: found closing character ')'", 88 | "Unbalanced parenthesis notation: found unclosed pair for character '('", 89 | "Unbalanced parenthesis notation: found unclosed pair for character '('", 90 | "Unbalanced parenthesis notation: found closing character ')'", 91 | "Unbalanced parenthesis notation: found closing character ')'", 92 | "Unexpected character 'x'; did you mean to pass allow_pseudoknots=True?", 93 | "Unbalanced parenthesis notation: found unclosed pair for character 'x'", 94 | "Mixed pair delimiters found: '[' and '('; did you mean to pass allow_pseudoknots=True?", 95 | "Unexpected character 'a'; did you mean to pass allow_pseudoknots=True?" 96 | ] 97 | 98 | def test_structure_sanitization_failure(): 99 | for (i, case) in enumerate(failure_cases): 100 | with pytest.raises(Exception) as exc_info: 101 | bp_list = utils.convert_dotbracket_to_bp_list(case[0], allow_pseudoknots=case[1]) 102 | assert(str(exc_info.value) == failure_expected_output[i]) -------------------------------------------------------------------------------- /tests/test_vfold_versions.py: -------------------------------------------------------------------------------- 1 | from arnie import pfunc 2 | 3 | # 3MXH c-di-GMP riboswitch, has coaxial stacking 4 | sample_seq = 'GGUCACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUACCGAUGG' 5 | 6 | 7 | def test_pkg(package, coaxial=True): 8 | 9 | Z = pfunc.pfunc(sample_seq, package=package, bpps=False, coaxial=coaxial) 10 | print('test %s' % package, Z) 11 | return None 12 | 13 | 14 | if __name__ == '__main__': 15 | for pkg in ['vfold_0', 'vfold_1']: 16 | for coaxial in [True, False]: 17 | print(pkg, "coaxial %d" % coaxial) 18 | test_pkg(pkg, coaxial=coaxial) 19 | --------------------------------------------------------------------------------