├── .github └── workflows │ └── release.yml ├── .gitignore ├── LICENSE ├── README.md ├── docs ├── .nojekyll ├── README.md ├── _sidebar.md ├── assets │ └── pseudoknot.png ├── development │ └── README.md ├── index.html ├── setup │ ├── environment.md │ ├── install.md │ └── packages.md ├── sherlock │ ├── README.md │ ├── environment.md │ └── jobs.md └── usage │ ├── README.md │ ├── pseudoknots.md │ ├── structure_prediction.md │ └── utilities.md ├── example_arnie_file.txt ├── examples ├── data_for_examples │ └── ribologic_SI.txt └── start_here.ipynb ├── notebooks ├── IntroToArnie.ipynb └── README.md ├── parameter_files ├── contrafold.params.complementary ├── contrafold.params.noncomplementary ├── learntofold.contrafold.params ├── rna_andronescu2007.par ├── rna_langdon2018.par ├── rna_turner1999.par └── rna_turner2004.par ├── pyproject.toml ├── pytest.ini ├── scripts ├── score_pseudoacc_mea.py ├── write_bpp_matrices.py └── write_unpaired_vectors.py ├── src └── arnie │ ├── __init__.py │ ├── bpps.py │ ├── free_energy.py │ ├── mea │ ├── __init__.py │ ├── mea.py │ ├── mea_utils.py │ └── threshknot.py │ ├── mfe.py │ ├── mfe_bootstrap.py │ ├── pfunc.py │ ├── pk_predictors.py │ ├── sample_structures.py │ ├── utils.py │ └── viz.py └── tests ├── __init__.py ├── test_bpps.py ├── test_converters.py ├── test_evaluation_metrics.py ├── test_file_readers.py ├── test_files ├── samiv_eternafold.prob ├── seq.bpseq ├── seq.ct └── seq.prob ├── test_helix_getting_and_removing.py ├── test_linearpartition.py ├── test_pfunc.py ├── test_pk.py ├── test_sample_struct.py ├── test_settings.py ├── test_structure_handling.py └── test_vfold_versions.py /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python package to PyPI 2 | 3 | on: push 4 | 5 | jobs: 6 | build: 7 | name: Build distribution package 8 | runs-on: "ubuntu-latest" 9 | 10 | steps: 11 | - name: Checkout source 12 | uses: actions/checkout@v4 13 | 14 | - name: Set up Python 3.12 15 | uses: actions/setup-python@v5 16 | with: 17 | python-version: "3.12" 18 | 19 | - name: Install pypa/build 20 | run: python3 -m pip install build --user 21 | 22 | - name: Build a binary wheel and a source tarball 23 | run: python3 -m build 24 | 25 | - name: Store the distribution packages 26 | uses: actions/upload-artifact@v4 27 | with: 28 | name: release-distributions 29 | path: dist/ 30 | 31 | publish-to-pypi: 32 | name: Publish Python distribution to PyPI 33 | if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes 34 | needs: 35 | - build 36 | runs-on: ubuntu-latest 37 | environment: 38 | name: pypi 39 | url: https://pypi.org/p/arnie 40 | permissions: 41 | id-token: write # IMPORTANT: mandatory for trusted publishing 42 | 43 | steps: 44 | - name: Download all the dists 45 | uses: actions/download-artifact@v4 46 | with: 47 | name: release-distributions 48 | path: dist/ 49 | - name: Publish distribution to PyPI 50 | uses: pypa/gh-action-pypi-publish@release/v1 51 | 52 | github-release: 53 | name: >- 54 | Sign the Python distribution with Sigstore 55 | and upload them to GitHub Release 56 | needs: 57 | - publish-to-pypi 58 | runs-on: ubuntu-latest 59 | 60 | permissions: 61 | contents: write # IMPORTANT: mandatory for making GitHub Releases 62 | id-token: write # IMPORTANT: mandatory for sigstore 63 | 64 | steps: 65 | - name: Download all the dists 66 | uses: actions/download-artifact@v4 67 | with: 68 | name: release-distributions 69 | path: dist/ 70 | - name: Sign the dists with Sigstore 71 | uses: sigstore/gh-action-sigstore-python@v1.2.3 72 | with: 73 | inputs: >- 74 | ./dist/*.tar.gz 75 | ./dist/*.whl 76 | - name: Create GitHub Release 77 | env: 78 | GITHUB_TOKEN: ${{ github.token }} 79 | run: >- 80 | gh release create 81 | '${{ github.ref_name }}' 82 | --repo '${{ github.repository }}' 83 | --notes "" 84 | - name: Upload artifact signatures to GitHub Release 85 | env: 86 | GITHUB_TOKEN: ${{ github.token }} 87 | # Upload to GitHub Release using the `gh` CLI. 88 | # `dist/` contains the built packages, and the 89 | # sigstore-produced signatures and certificates. 90 | run: >- 91 | gh release upload 92 | '${{ github.ref_name }}' dist/** 93 | --repo '${{ github.repository }}' 94 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | __pycache__/* 3 | .ipynb_checkpoints/* 4 | */.ipynb_checkpoints/* 5 | rna.ps 6 | *.arnie 7 | dist/* -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Leland Stanford Junior University 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # arnie 2 | Python API to compute RNA energetics and do structure prediction across multiple secondary structure packages. 3 | 4 | ## Documentation 5 | [See our full docs.](https://daslab.github.io/arnie) 6 | 7 | ## Install 8 | `arnie` is [available on PyPI](https://pypi.org/project/arnie/). 9 | 10 | `pip install arnie` 11 | 12 | ## Repo Organization 13 | 14 | `src/arnie`: source code for the arnie package. 15 | 16 | `docs`: docsify-based markdown documentation for the arnie package. 17 | 18 | `tests`: unit tests 19 | 20 | `notebooks`: example jupyter notebooks with usage. 21 | 22 | `scripts`: scripts for processing sequences in batch. 23 | 24 | `parameter_files`: dir of various parameter files for packages, put here out of convenience. 25 | 26 | 27 | (c) 2024 Leland Stanford Jr University 28 | Authors: 29 | Hannah Wayment-Steele -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/docs/.nojekyll -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # arnie 2 | Arnie is a Python API to compute RNA energetics and do structure prediction across multiple secondary structure packages. 3 | 4 | ## Install 5 | `arnie` is [available on PyPI](https://pypi.org/project/arnie/). 6 | 7 | `pip install arnie` 8 | 9 | ## Simple Setup 10 | Arnie works by delegating calls for structure predictions to various RNA prediction libraries. To use arnie we need to have these libraries installed, and we need to point to these their installed locations with environment variables. Here we will use [Eternafold](https://github.com/eternagame/Eternafold) which is simple to install via [Bioconda](https://bioconda.github.io/recipes/eternafold/README.html). This example assumes you have conda installed already; see the full [setup page](/setup/environment.md) for more details about setting up an arnie environment. 11 | 12 | 13 | ``` 14 | conda install -c bioconda eternafold 15 | export eternafold_PATH=/path/to/installed/location 16 | ``` 17 | 18 | ## Usage: 19 | 20 | See the [usage docs](/usage/structure_prediction) for example syntax. In brief, comparing across packages is simple. For computing base pairing probability matrices: 21 | 22 | ``` 23 | from arnie.bpps import bpps 24 | 25 | bpps_dict = {} 26 | my_sequence = 'CGCUGUCUGUACUUGUAUCAGUACACUGACGAGUCCCUAAAGGACGAAACAGCG' 27 | 28 | for pkg in ['vienna','nupack','RNAstructure','contrafold','RNAsoft']: 29 | bpps_dict[pkg] = bpps(my_sequence, package=pkg) 30 | ``` 31 | 32 | (c) 2024 [Das Lab](https://daslab.stanford.edu/), Leland Stanford Jr University -------------------------------------------------------------------------------- /docs/_sidebar.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | - [Home](README.md) 4 | - Getting Started 5 | - [Installation](setup/install.md) 6 | - [Environment](setup/environment.md) 7 | - Using Arnie 8 | - [Basics](usage/README.md) 9 | - [Structure Prediction](usage/structure_prediction.md) 10 | - [Pseudoknot Prediction](usage/pseudoknots.md) 11 | - Arnie on Sherlock 12 | - [Environment](sherlock/environment.md) 13 | - [Jobs](sherlock/jobs.md) 14 | - [Contributing](development/README.md) -------------------------------------------------------------------------------- /docs/assets/pseudoknot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/docs/assets/pseudoknot.png -------------------------------------------------------------------------------- /docs/development/README.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ## Installing via Github 4 | The `arnie` package source code is hosted on [Github](https://github.com/DasLab/arnie). You can clone the repo as below. 5 | 6 | ``` 7 | git clone https://github.com/DasLab/arnie.git 8 | ``` 9 | 10 | You can also use pip to install arnie from our Github repo: 11 | ``` 12 | pip install git+https://github.com/DasLab/arnie 13 | ``` 14 | This is particularly useful for testing new features internally before releasing on PyPI. 15 | 16 | ## Repo Organization 17 | 18 | `src/arnie`: source code for the arnie package. 19 | 20 | `docs`: docsify-based markdown documentation for the arnie package. 21 | 22 | `tests`: unit tests 23 | 24 | `notebooks`: example jupyter notebooks with usage. 25 | 26 | `scripts`: scripts for processing sequences in batch. 27 | 28 | `parameter_files`: dir of various parameter files for packages, put here out of convenience. 29 | 30 | 31 | 32 | ## Github Issues 33 | We use [Github issues](https://github.com/DasLab/arnie/issues) to coordinate development tasks and track feature development and bug fixes. If you run into problems while using `arnie`, please file an issue so that we can address the bug. Similarly, if you have a feature idea that could simplify your research, file an issue detailing your proposed feature. 34 | 35 | ## Package Testing 36 | Tests are located in the `tests` directory of the repo. We use the [pytest](https://docs.pytest.org/en/stable/) testing framework. Tests are run in the repo root directory. 37 | 38 | To run all the tests, 39 | ``` 40 | pytest 41 | ``` 42 | To run a specific test, 43 | ``` 44 | pytest tests/test_structure_handling.py 45 | ``` 46 | If you add new features or fix a bug, make sure to update the tests appropriately. 47 | 48 | ## Package Distribution 49 | We distribute arnie via the [Python Package Index](https://pypi.org/). The DasLab has a [PyPI account](https://pypi.org/user/daslab/) for all our packages, with `arnie` available [here](https://pypi.org/project/arnie/) 50 | 51 | Arnie package release is automated via Github Actions. The [release workflow](https://github.com/DasLab/arnie/actions/workflows/release.yml) builds the package for distribution, publishes to PyPI and releases a Github release. The action is triggered on new git tag push. 52 | 53 | To push a new release, update the `pyproject.toml` version number as appropriate (we follow the [semantic versioning](https://semver.org/) standard). Next, define a matching git tag for the version number, and then push to Github. 54 | ``` 55 | git checkout master 56 | git tag -a v1.1.0 -m "Arnie Release v1.1.0" 57 | git push origin tag v1.1.0 58 | ``` -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 |
6 |
7 | Unpaired bases in a loop structure may pair with nucleotides elsewhere in the RNA sequence. This type of pairing is impossible to represent with the `(`, `)`, `.` and characters in traditional dot bracket notation, so we introduce new characters to represent various levels of pseudoknot pairings. In order, Arnie uses `[`, `{`, `<`, and lower case alphabet characters (`abc...`) to represent opening pairs, and `]`, `}`, `>`, and upper case alphabet characters (`ABC...`) to represent closing pairs.
8 |
9 | Here is an example pseudoknotted structure in dot bracket notation utilizing the expanded character set `...(((..[[[.(((...))))))]]]...`.
10 |
11 | Many traditional structure prediction algorithms struggle with predicting pseudoknot structures, but there are a variety of approaches that can predict these complex folds. Arnie provides two main functions to predict pseudoknots: `pk_predict` and `pk_predict_from_bpp`.
12 |
13 | ## pk_predict
14 | `pk_predict` takes an input RNA sequence string and returns a predicted secondary structure string in dot bracket notation that may include pseudoknots. It's very similar to the `mfe` function, but supports a different set of predictor packages that focus on pseudoknot prediction.
15 |
16 | **Args:**
17 | ```
18 | seq (str): nucleic acid sequence, required
19 | predictor (str): the folding library to use
20 | ipknot options:
21 | model: one of ["LinearPartition-C","LinearPartition-V","Boltzmann","ViennaRNA","CONTRAfold","NUPACK"]
22 | t1: probability threshold level 1
23 | t2: probability threshold level 2
24 | refinement: number of times for refinment
25 |
26 | hotknots options:
27 | model: one of ["CC","RE","DP"]
28 | param: one of ["parameters_CC06.txt","parameters_CC09.txt","parameters_DP03.txt","parameters_DP09.txt"]
29 |
30 | spotrna options:
31 | cpu: number cpu threads
32 | ```
33 |
34 | **Returns:**
35 | ```
36 | A string in dot bracket notation representing the predicted secondary structure of the provided sequence, potentially including pseudoknots.
37 | ```
38 |
39 | **Example:**
40 | ```
41 | pk_predict("GUAUCAAAAAAGAUACGCCGUAUGCUAAUAUGUAUCUAUACUUGCUCUACAGGUUGAG", "knotty")
42 |
43 | '..........(((((([[[[[[.[[...[[[))))))]]]...]]..]]].]]]....'
44 | ```
45 |
46 | **Supported packages:**
47 | - `hotknots`
48 | - `ipknot`
49 | - `knotty`
50 | - `spotrna`
51 | - `spotrna2`
52 | - `e2efold`
53 | - `pknots`
54 | - `nupack`
55 |
56 | ## pk_predict_from_bpp
57 | `pk_predict_from_bpp` takes a different approach to pseudoknot prediction. Rather than use dedicated pseudoknot prediction packages, `pk_predict_from_bpp` uses post-processing algorithms that can predict likely pseudoknots based on a sequence's predicted base pair probability matrix. This allows us to examine sequences for predicted pseudoknots with traditional predictive models that don't support pseudoknots by default.
58 |
59 | `pk_predict_from_bpp` provides two processing algorithms, [`threshknot`](https://arxiv.org/abs/1912.12796) and [`hungarian`](https://en.wikipedia.org/wiki/Hungarian_algorithm).
60 |
61 | **Args:**
62 | ```
63 | bpp (array): base pair probability matrix, required
64 | heuristic (str): the pk prediction algorithm to use; either "hungarian" or "threshknot"
65 | threshknot options:
66 | theta
67 | max_iter
68 | allowed_buldge_len
69 | min_len_helix
70 |
71 | hungarian options:
72 | add_p_unpaired
73 | theta (aka prob_to_0_threshold_post)
74 | prob_to_0_threshold_prior
75 | prob_to_1_threshold_prior
76 | exp
77 | sigmoid_slope_factor
78 | ln
79 | allowed_buldge_len
80 | min_len_helix
81 | ```
82 |
83 | **Returns:**
84 | ```
85 | A string in dot bracket notation representing the predicted secondary structure of the provided sequence, potentially including pseudoknots.
86 | ```
87 |
88 | **Example:**
89 | ```
90 | bpps = bpps("GUAUCAAAAAAGAUACGCCGUAUGCUAAUAUGUAGGCGCUAUACUUGCUCUACACCGGCGGUUGAG", package="eternafold")
91 | pk_predict_bpp(bpps)
92 |
93 | '(((((......)))))..........................................'
94 | ```
95 |
96 | **Supported packages:**
97 | - `eternafold`
98 | - `contrafold`
99 | - `vienna`
100 | - `nupack`
101 | - `rnasoft`
102 | - `rnastructure`
103 | - `vfold`
104 |
105 |
--------------------------------------------------------------------------------
/docs/usage/structure_prediction.md:
--------------------------------------------------------------------------------
1 |
2 | ## Structure Prediction
3 |
4 | ## MFE
5 | The `mfe` function generates a "minimum free energy" structure prediction with the selected package. The minimum free energy prediction is the secondary structure calculated to have the lowest free energy value. In theory, the lower the free energy, the more likely the structure is to form. Not all predictors support free energy-based estimates (although many do).
6 |
7 | Note: `mfe` operates differently than [`mea`](#mea). That said, contrafold's default structure prediction is an MEA structure, not MFE. When using `mfe`, calling contrafold returns the default MEA structure unless the `--viterbi` flag is used, which will use the viterbi (MFE) algorithm in contrafold.
8 |
9 |
10 | **Args:**
11 | ```
12 | seq (str): nucleic acid sequence, required
13 | package (str): the folding library to use
14 | T (float): temperature (Celsius)
15 | constraint (str): structure constraints
16 | motif (str): argument to vienna motif
17 | linear (bool): call LinearFold to estimate MFE in Vienna or Contrafold
18 | return_dG_MFE (bool): also return dG(MFE) (specific to linearfold)
19 | dangles (bool): dangles or not (specific to linearfold)
20 | noncanonical(bool): include noncanonical pairs or not (specific to contrafold, RNAstructure (Cyclefold))
21 | param_file(str): path to specific thermodynamic parameter file (specific to contrafold, eternafold)
22 | coaxial (bool): coaxial stacking or not (specific to rnastructure)
23 | viterbi (bool): use the viterbi algorithm for mfe calculation (specific to contrafold)
24 | pseudo (bool): if True, will predict pseudoknots
25 | shape_signal (list): list of normalized SHAPE reactivities, with negative values indicating no signal (specific to rnastructure)
26 | dms_signal (list): list of normalized DMS reactivities, with negative values indicating no signal (specific to rnastructure)
27 | shape_file (str): path to file containing shape_signal (specific to rnastructure)
28 | dms_file (str): path to file containing dms_signal (specific to rnastructure)
29 | ```
30 |
31 | **Returns:**
32 | ```
33 | A string in dot bracket notation representing the calculated MFE structure of the provided sequence.
34 | ```
35 |
36 | **Example:**
37 | ```
38 | mfe("GUAUCAAAAAAGAUAC")
39 | '(((((......)))))'
40 | ```
41 |
42 | **Supported packages:**
43 | - `eternafold`
44 | - `contrafold`
45 | - `vienna`
46 | - `rnastructure`
47 | - `linearfold`
48 |
49 | ## BPPS
50 | The `bpps` function calculates the "base pairing probability matrix" with the selected package. The base pairing probaility matrix is an NxN matrix (where N is the length of the RNA sequence), with the value of the `i,j` position representing the probability of the `i` nucleotide pairing with the `j` nucleotide.
51 |
52 | **Args:**
53 | ```
54 | sequence (str): nucleic acid sequence, required
55 | package (str): the folding library to use
56 | constraint (str): structure constraint [vienna, contrafold, rnastructure]
57 | linear (bool): call LinearPartition to estimate Z in Vienna or Contrafold
58 |
59 | motif (str): argument to vienna motif
60 | pseudo (bool): (NUPACK only) include pseudoknot calculation
61 | dangles (bool): dangles or not, specifiable for vienna, nupack
62 | dna (bool): (NUPACK only) use SantaLucia 1998 parameters for DNA
63 | coaxial (bool): coaxial stacking or not, specifiable for rnastructure, vfold
64 | noncanonical(bool): include noncanonical pairs or not (for contrafold, RNAstructure (Cyclefold))
65 | beam size (int): Beam size for LinearPartition base pair calculation.
66 | DEBUG (bool): Output command-line calls to packages.
67 | threshknot (bool): calls threshknot to predict pseudoknots (for contrafold with LinearPartition)
68 | shape_signal (list): list of normalized SHAPE reactivities, with negative values indicating no signal (specific to rnastructure)
69 | dms_signal (list): list of normalized DMS reactivities, with negative values indicating no signal (specific to rnastructure)
70 | shape_file (str): path to file containing shape_signal (specific to rnastructure)
71 | dms_file (str): path to file containing dms_signal (specific to rnastructure)
72 | ```
73 |
74 | **Returns:**
75 | ```
76 | array: NxN matrix of base pair probabilities
77 | ```
78 |
79 | **Example:**
80 | ```
81 | bpps("GUAUCAAAAAAGAUAC")
82 | array([[0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 3.77178e-04,
83 | 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
84 | 0.00000e+00, 0.00000e+00, 0.00000e+00, 4.39771e-04, 0.00000e+00,
85 | 8.24776e-01],
86 | [0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
87 | 0.00000e+00, 1.69534e-04, 2.01963e-04, 1.93469e-04, 2.05658e-04,
88 | 2.01099e-04, 1.37709e-04, 5.21924e-04, 0.00000e+00, 8.42528e-01,
89 | 0.00000e+00],
90 | ...
91 | ```
92 |
93 | **Supported packages:**
94 | - `eternafold`
95 | - `contrafold`
96 | - `vienna`
97 | - `nupack`
98 | - `rnasoft`
99 | - `rnastructure`
100 | - `vfold`
--------------------------------------------------------------------------------
/docs/usage/utilities.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/docs/usage/utilities.md
--------------------------------------------------------------------------------
/example_arnie_file.txt:
--------------------------------------------------------------------------------
1 | # paths to local installations of packages. If package is not installed, leave as None
2 | # Replace paths below with paths to your installations.
3 | # NB: .gitignore file ignores *.arnie files. Name it as such if you don't want your local path file
4 | # included with your git repo.
5 |
6 | rnastructure: /path/to/RNAstructure/exe
7 | rnasoft: /path/to/MultiRNAFold
8 | contrafold_2: /path/to/contrafold-se/src
9 | eternafold: /path/to/eternafold/src/
10 | vfold: /path/to/Vfold2D
11 | nupack: /path/to/nupack3.2.2/build/bin
12 |
13 | # for a Mac installed binary:
14 | vienna_2: /usr/local/bin
15 | # for path to a vienna build:
16 | vienna_2: /path/to/ViennaRNA-2.4.10/src/bin
17 | vienna_1: /path/to/ViennaRNA-1.8.5/bin
18 |
19 | # for linear partition
20 | linearfold: /path/to/LinearFold/bin
21 | linearpartition: /path/to/LinearPartition/bin
22 |
23 | # for PK predictors
24 | hotknots: /path/to/HotKnots_v2.0/bin
25 | ipknot: /path/to/ipknot/build
26 | knotty: /path/to/Knotty
27 | pknots: /path/to/PKNOTS/bin
28 | spotrna: /path/to/SPOT-RNA
29 | spotrna_conda_env: /path/to/miniconda3/envs/spotrna/bin
30 | spotrna2: /path/to/SPOT-RNA2
31 | e2efold: /path/to/e2efold/e2efold_productive
32 | e2efold_conda_env: /path/to/miniconda3/envs/e2efold/bin
33 |
34 | #TMP: location for tmp files for packages. Update to where you want your tmp files stored.
35 | TMP: /tmp
36 |
--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
1 | # Notebooks
2 | This directory houses various notebooks demonstrating key Arnie functionality.
3 |
4 | - [Basic Introduction / Install](https://github.com/daslab/arnie/blob/master/notebooks/IntroToArnie.ipynb)
5 | [](https://colab.research.google.com/github/daslab/arnie/blob/master/notebooks/IntroToArnie.ipynb)
--------------------------------------------------------------------------------
/parameter_files/contrafold.params.complementary:
--------------------------------------------------------------------------------
1 | base_pair_AA 0
2 | base_pair_AC 0
3 | base_pair_AG 0
4 | base_pair_AU 0.59791199
5 | base_pair_CC 0
6 | base_pair_CG 1.544290641
7 | base_pair_CU 0
8 | base_pair_GG 0
9 | base_pair_GU -0.01304754992
10 | base_pair_UU 0
11 | terminal_mismatch_AAAA 0
12 | terminal_mismatch_AAAC 0
13 | terminal_mismatch_AAAG 0
14 | terminal_mismatch_AAAU 0
15 | terminal_mismatch_AACA 0
16 | terminal_mismatch_AACC 0
17 | terminal_mismatch_AACG 0
18 | terminal_mismatch_AACU 0
19 | terminal_mismatch_AAGA 0
20 | terminal_mismatch_AAGC 0
21 | terminal_mismatch_AAGG 0
22 | terminal_mismatch_AAGU 0
23 | terminal_mismatch_AAUA 0
24 | terminal_mismatch_AAUC 0
25 | terminal_mismatch_AAUG 0
26 | terminal_mismatch_AAUU 0
27 | terminal_mismatch_ACAA 0
28 | terminal_mismatch_ACAC 0
29 | terminal_mismatch_ACAG 0
30 | terminal_mismatch_ACAU 0
31 | terminal_mismatch_ACCA 0
32 | terminal_mismatch_ACCC 0
33 | terminal_mismatch_ACCG 0
34 | terminal_mismatch_ACCU 0
35 | terminal_mismatch_ACGA 0
36 | terminal_mismatch_ACGC 0
37 | terminal_mismatch_ACGG 0
38 | terminal_mismatch_ACGU 0
39 | terminal_mismatch_ACUA 0
40 | terminal_mismatch_ACUC 0
41 | terminal_mismatch_ACUG 0
42 | terminal_mismatch_ACUU 0
43 | terminal_mismatch_AGAA 0
44 | terminal_mismatch_AGAC 0
45 | terminal_mismatch_AGAG 0
46 | terminal_mismatch_AGAU 0
47 | terminal_mismatch_AGCA 0
48 | terminal_mismatch_AGCC 0
49 | terminal_mismatch_AGCG 0
50 | terminal_mismatch_AGCU 0
51 | terminal_mismatch_AGGA 0
52 | terminal_mismatch_AGGC 0
53 | terminal_mismatch_AGGG 0
54 | terminal_mismatch_AGGU 0
55 | terminal_mismatch_AGUA 0
56 | terminal_mismatch_AGUC 0
57 | terminal_mismatch_AGUG 0
58 | terminal_mismatch_AGUU 0
59 | terminal_mismatch_AUAA -0.184546064
60 | terminal_mismatch_AUAC -0.1181844187
61 | terminal_mismatch_AUAG -0.4461469607
62 | terminal_mismatch_AUAU -0.6175254495
63 | terminal_mismatch_AUCA 0.004788458708
64 | terminal_mismatch_AUCC 0.08319395146
65 | terminal_mismatch_AUCG -0.2249479995
66 | terminal_mismatch_AUCU -0.3981327204
67 | terminal_mismatch_AUGA 0.5191110288
68 | terminal_mismatch_AUGC -0.3524119307
69 | terminal_mismatch_AUGG -0.4056429433
70 | terminal_mismatch_AUGU -0.7733932162
71 | terminal_mismatch_AUUA -0.01574403519
72 | terminal_mismatch_AUUC 0.268570042
73 | terminal_mismatch_AUUG -0.0934388741
74 | terminal_mismatch_AUUU 0.3373711531
75 | terminal_mismatch_CAAA 0
76 | terminal_mismatch_CAAC 0
77 | terminal_mismatch_CAAG 0
78 | terminal_mismatch_CAAU 0
79 | terminal_mismatch_CACA 0
80 | terminal_mismatch_CACC 0
81 | terminal_mismatch_CACG 0
82 | terminal_mismatch_CACU 0
83 | terminal_mismatch_CAGA 0
84 | terminal_mismatch_CAGC 0
85 | terminal_mismatch_CAGG 0
86 | terminal_mismatch_CAGU 0
87 | terminal_mismatch_CAUA 0
88 | terminal_mismatch_CAUC 0
89 | terminal_mismatch_CAUG 0
90 | terminal_mismatch_CAUU 0
91 | terminal_mismatch_CCAA 0
92 | terminal_mismatch_CCAC 0
93 | terminal_mismatch_CCAG 0
94 | terminal_mismatch_CCAU 0
95 | terminal_mismatch_CCCA 0
96 | terminal_mismatch_CCCC 0
97 | terminal_mismatch_CCCG 0
98 | terminal_mismatch_CCCU 0
99 | terminal_mismatch_CCGA 0
100 | terminal_mismatch_CCGC 0
101 | terminal_mismatch_CCGG 0
102 | terminal_mismatch_CCGU 0
103 | terminal_mismatch_CCUA 0
104 | terminal_mismatch_CCUC 0
105 | terminal_mismatch_CCUG 0
106 | terminal_mismatch_CCUU 0
107 | terminal_mismatch_CGAA 0.08386423535
108 | terminal_mismatch_CGAC -0.2520716816
109 | terminal_mismatch_CGAG -0.6711841881
110 | terminal_mismatch_CGAU -0.3816350028
111 | terminal_mismatch_CGCA 0.1117852189
112 | terminal_mismatch_CGCC -0.1704393624
113 | terminal_mismatch_CGCG -0.2179987732
114 | terminal_mismatch_CGCU -0.459267635
115 | terminal_mismatch_CGGA 0.8520640313
116 | terminal_mismatch_CGGC -0.9332488517
117 | terminal_mismatch_CGGG -0.3289551692
118 | terminal_mismatch_CGGU -0.7778822056
119 | terminal_mismatch_CGUA -0.2422339958
120 | terminal_mismatch_CGUC -0.03780509247
121 | terminal_mismatch_CGUG -0.4322334143
122 | terminal_mismatch_CGUU -0.2419976114
123 | terminal_mismatch_CUAA 0
124 | terminal_mismatch_CUAC 0
125 | terminal_mismatch_CUAG 0
126 | terminal_mismatch_CUAU 0
127 | terminal_mismatch_CUCA 0
128 | terminal_mismatch_CUCC 0
129 | terminal_mismatch_CUCG 0
130 | terminal_mismatch_CUCU 0
131 | terminal_mismatch_CUGA 0
132 | terminal_mismatch_CUGC 0
133 | terminal_mismatch_CUGG 0
134 | terminal_mismatch_CUGU 0
135 | terminal_mismatch_CUUA 0
136 | terminal_mismatch_CUUC 0
137 | terminal_mismatch_CUUG 0
138 | terminal_mismatch_CUUU 0
139 | terminal_mismatch_GAAA 0
140 | terminal_mismatch_GAAC 0
141 | terminal_mismatch_GAAG 0
142 | terminal_mismatch_GAAU 0
143 | terminal_mismatch_GACA 0
144 | terminal_mismatch_GACC 0
145 | terminal_mismatch_GACG 0
146 | terminal_mismatch_GACU 0
147 | terminal_mismatch_GAGA 0
148 | terminal_mismatch_GAGC 0
149 | terminal_mismatch_GAGG 0
150 | terminal_mismatch_GAGU 0
151 | terminal_mismatch_GAUA 0
152 | terminal_mismatch_GAUC 0
153 | terminal_mismatch_GAUG 0
154 | terminal_mismatch_GAUU 0
155 | terminal_mismatch_GCAA -0.1703136025
156 | terminal_mismatch_GCAC -0.09154056357
157 | terminal_mismatch_GCAG -0.2522413002
158 | terminal_mismatch_GCAU -0.8520314799
159 | terminal_mismatch_GCCA 0.04763224188
160 | terminal_mismatch_GCCC -0.2428654283
161 | terminal_mismatch_GCCG -0.2079275061
162 | terminal_mismatch_GCCU -0.1874270053
163 | terminal_mismatch_GCGA 0.6540033983
164 | terminal_mismatch_GCGC -0.7823988605
165 | terminal_mismatch_GCGG 0.1995898255
166 | terminal_mismatch_GCGU -0.4432169392
167 | terminal_mismatch_GCUA -0.1736921762
168 | terminal_mismatch_GCUC 0.288494362
169 | terminal_mismatch_GCUG -0.01638238057
170 | terminal_mismatch_GCUU 0.6757988971
171 | terminal_mismatch_GGAA 0
172 | terminal_mismatch_GGAC 0
173 | terminal_mismatch_GGAG 0
174 | terminal_mismatch_GGAU 0
175 | terminal_mismatch_GGCA 0
176 | terminal_mismatch_GGCC 0
177 | terminal_mismatch_GGCG 0
178 | terminal_mismatch_GGCU 0
179 | terminal_mismatch_GGGA 0
180 | terminal_mismatch_GGGC 0
181 | terminal_mismatch_GGGG 0
182 | terminal_mismatch_GGGU 0
183 | terminal_mismatch_GGUA 0
184 | terminal_mismatch_GGUC 0
185 | terminal_mismatch_GGUG 0
186 | terminal_mismatch_GGUU 0
187 | terminal_mismatch_GUAA -0.4871607613
188 | terminal_mismatch_GUAC 0.1105031953
189 | terminal_mismatch_GUAG 0.363373916
190 | terminal_mismatch_GUAU -0.6193199348
191 | terminal_mismatch_GUCA 0.3451056056
192 | terminal_mismatch_GUCC 0.0314944976
193 | terminal_mismatch_GUCG -0.3799172956
194 | terminal_mismatch_GUCU -0.03222973182
195 | terminal_mismatch_GUGA 0.4948638637
196 | terminal_mismatch_GUGC -0.2821952552
197 | terminal_mismatch_GUGG -0.2702227211
198 | terminal_mismatch_GUGU -0.06658395291
199 | terminal_mismatch_GUUA -0.4306154451
200 | terminal_mismatch_GUUC -0.09497863465
201 | terminal_mismatch_GUUG -0.3130794485
202 | terminal_mismatch_GUUU -0.2283242981
203 | terminal_mismatch_UAAA 0.0115363879
204 | terminal_mismatch_UAAC -0.3923408221
205 | terminal_mismatch_UAAG 0.05661063599
206 | terminal_mismatch_UAAU -0.1251485388
207 | terminal_mismatch_UACA -0.06545074758
208 | terminal_mismatch_UACC -0.3167200568
209 | terminal_mismatch_UACG 0.002258383981
210 | terminal_mismatch_UACU -0.422217724
211 | terminal_mismatch_UAGA 0.5458416646
212 | terminal_mismatch_UAGC -0.2085887954
213 | terminal_mismatch_UAGG -0.1971766062
214 | terminal_mismatch_UAGU -0.4722410132
215 | terminal_mismatch_UAUA -0.1779642496
216 | terminal_mismatch_UAUC 0.1643454344
217 | terminal_mismatch_UAUG -0.5005617032
218 | terminal_mismatch_UAUU 0.1333867679
219 | terminal_mismatch_UCAA 0
220 | terminal_mismatch_UCAC 0
221 | terminal_mismatch_UCAG 0
222 | terminal_mismatch_UCAU 0
223 | terminal_mismatch_UCCA 0
224 | terminal_mismatch_UCCC 0
225 | terminal_mismatch_UCCG 0
226 | terminal_mismatch_UCCU 0
227 | terminal_mismatch_UCGA 0
228 | terminal_mismatch_UCGC 0
229 | terminal_mismatch_UCGG 0
230 | terminal_mismatch_UCGU 0
231 | terminal_mismatch_UCUA 0
232 | terminal_mismatch_UCUC 0
233 | terminal_mismatch_UCUG 0
234 | terminal_mismatch_UCUU 0
235 | terminal_mismatch_UGAA 0.1218741278
236 | terminal_mismatch_UGAC 0.1990260141
237 | terminal_mismatch_UGAG 0.04681893928
238 | terminal_mismatch_UGAU 0.3256264491
239 | terminal_mismatch_UGCA 0.1186812326
240 | terminal_mismatch_UGCC -0.1851065102
241 | terminal_mismatch_UGCG -0.04311512683
242 | terminal_mismatch_UGCU -0.6150608139
243 | terminal_mismatch_UGGA 0.754933218
244 | terminal_mismatch_UGGC -0.3150708483
245 | terminal_mismatch_UGGG 0.1569582926
246 | terminal_mismatch_UGGU -0.514970007
247 | terminal_mismatch_UGUA -0.2926246029
248 | terminal_mismatch_UGUC 0.1373068149
249 | terminal_mismatch_UGUG -0.05422333363
250 | terminal_mismatch_UGUU 0.03086776921
251 | terminal_mismatch_UUAA 0
252 | terminal_mismatch_UUAC 0
253 | terminal_mismatch_UUAG 0
254 | terminal_mismatch_UUAU 0
255 | terminal_mismatch_UUCA 0
256 | terminal_mismatch_UUCC 0
257 | terminal_mismatch_UUCG 0
258 | terminal_mismatch_UUCU 0
259 | terminal_mismatch_UUGA 0
260 | terminal_mismatch_UUGC 0
261 | terminal_mismatch_UUGG 0
262 | terminal_mismatch_UUGU 0
263 | terminal_mismatch_UUUA 0
264 | terminal_mismatch_UUUC 0
265 | terminal_mismatch_UUUG 0
266 | terminal_mismatch_UUUU 0
267 | hairpin_length_at_least_0 -5.993180158
268 | hairpin_length_at_least_1 -3.108105762
269 | hairpin_length_at_least_2 0.4168976347
270 | hairpin_length_at_least_3 2.205419066
271 | hairpin_length_at_least_4 1.926749692
272 | hairpin_length_at_least_5 -0.5873245329
273 | hairpin_length_at_least_6 -0.0827571778
274 | hairpin_length_at_least_7 0.5783889844
275 | hairpin_length_at_least_8 -0.7220883372
276 | hairpin_length_at_least_9 -0.1725874624
277 | hairpin_length_at_least_10 -0.3025089867
278 | hairpin_length_at_least_11 -0.0296315939
279 | hairpin_length_at_least_12 -0.9268995948
280 | hairpin_length_at_least_13 -0.03157753978
281 | hairpin_length_at_least_14 -0.1022472101
282 | hairpin_length_at_least_15 0.1901407346
283 | hairpin_length_at_least_16 -0.09280909826
284 | hairpin_length_at_least_17 0.1690448408
285 | hairpin_length_at_least_18 -0.08172566471
286 | hairpin_length_at_least_19 -0.3445939031
287 | hairpin_length_at_least_20 -0.109150294
288 | hairpin_length_at_least_21 -0.2903523693
289 | hairpin_length_at_least_22 -0.3393713667
290 | hairpin_length_at_least_23 -0.1915364117
291 | hairpin_length_at_least_24 -0.05019209379
292 | hairpin_length_at_least_25 -0.03874620924
293 | hairpin_length_at_least_26 0.04751470752
294 | hairpin_length_at_least_27 0.06744321926
295 | hairpin_length_at_least_28 0.09721875726
296 | hairpin_length_at_least_29 0.1673131733
297 | hairpin_length_at_least_30 0.2329937249
298 | internal_explicit_1_1 -0.1754591076
299 | internal_explicit_1_2 0.03083787104
300 | internal_explicit_1_3 -0.171565435
301 | internal_explicit_1_4 -0.2294680983
302 | internal_explicit_2_2 -0.1304072693
303 | internal_explicit_2_3 -0.07730329553
304 | internal_explicit_2_4 0.2782767264
305 | internal_explicit_3_3 -0.02898949617
306 | internal_explicit_3_4 0.3112350694
307 | internal_explicit_4_4 -0.3226348245
308 | bulge_length_at_least_1 -2.399548472
309 | bulge_length_at_least_2 -0.8945183117
310 | bulge_length_at_least_3 -0.9088550909
311 | bulge_length_at_least_4 -0.8412474755
312 | bulge_length_at_least_5 -0.4365479343
313 | bulge_length_at_least_6 -0.5699187801
314 | bulge_length_at_least_7 0.2002834224
315 | bulge_length_at_least_8 0.7538761358
316 | bulge_length_at_least_9 -0.6045045455
317 | bulge_length_at_least_10 -0.7200948098
318 | bulge_length_at_least_11 -0.5136721921
319 | bulge_length_at_least_12 -0.3614726679
320 | bulge_length_at_least_13 -0.2614454392
321 | bulge_length_at_least_14 -0.1593926893
322 | bulge_length_at_least_15 -0.08624668281
323 | bulge_length_at_least_16 -0.03107090996
324 | bulge_length_at_least_17 -0.01097222032
325 | bulge_length_at_least_18 0.03001220283
326 | bulge_length_at_least_19 0.04759123789
327 | bulge_length_at_least_20 -0.04296172065
328 | bulge_length_at_least_21 -0.01791899662
329 | bulge_length_at_least_22 -0.07800551522
330 | bulge_length_at_least_23 -0.0709932643
331 | bulge_length_at_least_24 -0.05767952896
332 | bulge_length_at_least_25 -0.04633794681
333 | bulge_length_at_least_26 -0.03559420456
334 | bulge_length_at_least_27 -0.02674934394
335 | bulge_length_at_least_28 -0.01818957972
336 | bulge_length_at_least_29 -0.01052300732
337 | bulge_length_at_least_30 -0.005153626846
338 | internal_length_at_least_2 -0.429061443
339 | internal_length_at_least_3 -0.3532111501
340 | internal_length_at_least_4 -0.3963797535
341 | internal_length_at_least_5 -0.3111199175
342 | internal_length_at_least_6 -0.2551945472
343 | internal_length_at_least_7 -0.05149116898
344 | internal_length_at_least_8 -0.04319002407
345 | internal_length_at_least_9 0.001985489485
346 | internal_length_at_least_10 -0.1761513136
347 | internal_length_at_least_11 -0.2639686207
348 | internal_length_at_least_12 -0.3460613577
349 | internal_length_at_least_13 -0.2926603079
350 | internal_length_at_least_14 -0.03624250307
351 | internal_length_at_least_15 -0.1199953761
352 | internal_length_at_least_16 -0.04354771926
353 | internal_length_at_least_17 -0.08209293135
354 | internal_length_at_least_18 -0.007113226038
355 | internal_length_at_least_19 0.02354824852
356 | internal_length_at_least_20 0.03066973571
357 | internal_length_at_least_21 -0.06618241094
358 | internal_length_at_least_22 -0.1316092383
359 | internal_length_at_least_23 -0.1407995514
360 | internal_length_at_least_24 -0.06600291862
361 | internal_length_at_least_25 -0.07779204744
362 | internal_length_at_least_26 -0.05084201265
363 | internal_length_at_least_27 -0.04139875601
364 | internal_length_at_least_28 0.003276583405
365 | internal_length_at_least_29 0.00592458284
366 | internal_length_at_least_30 0.006875738004
367 | internal_symmetric_length_at_least_1 -0.5467082599
368 | internal_symmetric_length_at_least_2 -0.3854701647
369 | internal_symmetric_length_at_least_3 -0.2588466401
370 | internal_symmetric_length_at_least_4 -0.2340836745
371 | internal_symmetric_length_at_least_5 0.1450577765
372 | internal_symmetric_length_at_least_6 -0.6562932515
373 | internal_symmetric_length_at_least_7 -0.3021088369
374 | internal_symmetric_length_at_least_8 -0.03032275267
375 | internal_symmetric_length_at_least_9 -0.3517944058
376 | internal_symmetric_length_at_least_10 -0.2159132506
377 | internal_symmetric_length_at_least_11 -0.1228270454
378 | internal_symmetric_length_at_least_12 -0.1552208595
379 | internal_symmetric_length_at_least_13 -0.08541120743
380 | internal_symmetric_length_at_least_14 -0.04592109799
381 | internal_symmetric_length_at_least_15 -0.02232234236
382 | internal_asymmetry_at_least_1 -2.105646719
383 | internal_asymmetry_at_least_2 -0.5520140431
384 | internal_asymmetry_at_least_3 -0.577070767
385 | internal_asymmetry_at_least_4 -0.6136667847
386 | internal_asymmetry_at_least_5 -0.3057156841
387 | internal_asymmetry_at_least_6 -0.1155052001
388 | internal_asymmetry_at_least_7 -0.2105612231
389 | internal_asymmetry_at_least_8 -0.314574313
390 | internal_asymmetry_at_least_9 -0.3148961681
391 | internal_asymmetry_at_least_10 -0.09018189492
392 | internal_asymmetry_at_least_11 -0.2200026794
393 | internal_asymmetry_at_least_12 -0.1406483243
394 | internal_asymmetry_at_least_13 -0.2162411259
395 | internal_asymmetry_at_least_14 -0.1725531435
396 | internal_asymmetry_at_least_15 -0.1558911866
397 | internal_asymmetry_at_least_16 -0.1040858663
398 | internal_asymmetry_at_least_17 -0.06967684228
399 | internal_asymmetry_at_least_18 -0.04105977494
400 | internal_asymmetry_at_least_19 -0.01570624316
401 | internal_asymmetry_at_least_20 0.01382000639
402 | internal_asymmetry_at_least_21 0.04131988563
403 | internal_asymmetry_at_least_22 0.0359418595
404 | internal_asymmetry_at_least_23 0.02822186282
405 | internal_asymmetry_at_least_24 0.01636585874
406 | internal_asymmetry_at_least_25 0.02550056175
407 | internal_asymmetry_at_least_26 0.03348032793
408 | internal_asymmetry_at_least_27 0.03971924412
409 | internal_asymmetry_at_least_28 -0.002545113932
410 | bulge_0x1_nucleotides_A -0.1216861662
411 | bulge_0x1_nucleotides_C -0.07111241127
412 | bulge_0x1_nucleotides_G 0.008947026647
413 | bulge_0x1_nucleotides_U -0.002685763742
414 | internal_1x1_nucleotides_AA 0.2944404686
415 | internal_1x1_nucleotides_AC 0.08641360967
416 | internal_1x1_nucleotides_AG -0.3664197228
417 | internal_1x1_nucleotides_AU -0.2053107048
418 | internal_1x1_nucleotides_CC -0.1582543624
419 | internal_1x1_nucleotides_CG 0.4175273724
420 | internal_1x1_nucleotides_CU 0.1368762582
421 | internal_1x1_nucleotides_GG -0.1193514754
422 | internal_1x1_nucleotides_GU -0.4188101413
423 | internal_1x1_nucleotides_UU 0.147140653
424 | helix_stacking_AAAA 0
425 | helix_stacking_AAAC 0
426 | helix_stacking_AAAG 0
427 | helix_stacking_AAAU 0
428 | helix_stacking_AACA 0
429 | helix_stacking_AACC 0
430 | helix_stacking_AACG 0
431 | helix_stacking_AACU 0
432 | helix_stacking_AAGA 0
433 | helix_stacking_AAGC 0
434 | helix_stacking_AAGG 0
435 | helix_stacking_AAGU 0
436 | helix_stacking_AAUA 0
437 | helix_stacking_AAUC 0
438 | helix_stacking_AAUG 0
439 | helix_stacking_AAUU 0
440 | helix_stacking_ACAC 0
441 | helix_stacking_ACAG 0
442 | helix_stacking_ACAU 0
443 | helix_stacking_ACCA 0
444 | helix_stacking_ACCC 0
445 | helix_stacking_ACCG 0
446 | helix_stacking_ACCU 0
447 | helix_stacking_ACGA 0
448 | helix_stacking_ACGC 0
449 | helix_stacking_ACGG 0
450 | helix_stacking_ACGU 0
451 | helix_stacking_ACUA 0
452 | helix_stacking_ACUC 0
453 | helix_stacking_ACUG 0
454 | helix_stacking_ACUU 0
455 | helix_stacking_AGAC 0
456 | helix_stacking_AGAG 0
457 | helix_stacking_AGAU 0
458 | helix_stacking_AGCC 0
459 | helix_stacking_AGCG 0
460 | helix_stacking_AGCU 0
461 | helix_stacking_AGGA 0
462 | helix_stacking_AGGC 0
463 | helix_stacking_AGGG 0
464 | helix_stacking_AGGU 0
465 | helix_stacking_AGUA 0
466 | helix_stacking_AGUC 0
467 | helix_stacking_AGUG 0
468 | helix_stacking_AGUU 0
469 | helix_stacking_AUAC 0
470 | helix_stacking_AUAG 0
471 | helix_stacking_AUAU 0.1482005248
472 | helix_stacking_AUCC 0
473 | helix_stacking_AUCG 0.4343497127
474 | helix_stacking_AUCU 0
475 | helix_stacking_AUGC 0.7079642577
476 | helix_stacking_AUGG 0
477 | helix_stacking_AUGU -0.1010777582
478 | helix_stacking_AUUA 0.243256656
479 | helix_stacking_AUUC 0
480 | helix_stacking_AUUG 0.1623654243
481 | helix_stacking_AUUU 0
482 | helix_stacking_CAAC 0
483 | helix_stacking_CAAG 0
484 | helix_stacking_CAAU 0
485 | helix_stacking_CACC 0
486 | helix_stacking_CACG 0
487 | helix_stacking_CACU 0
488 | helix_stacking_CAGC 0
489 | helix_stacking_CAGG 0
490 | helix_stacking_CAGU 0
491 | helix_stacking_CAUC 0
492 | helix_stacking_CAUG 0
493 | helix_stacking_CAUU 0
494 | helix_stacking_CCAG 0
495 | helix_stacking_CCAU 0
496 | helix_stacking_CCCC 0
497 | helix_stacking_CCCG 0
498 | helix_stacking_CCCU 0
499 | helix_stacking_CCGC 0
500 | helix_stacking_CCGG 0
501 | helix_stacking_CCGU 0
502 | helix_stacking_CCUC 0
503 | helix_stacking_CCUG 0
504 | helix_stacking_CCUU 0
505 | helix_stacking_CGAG 0
506 | helix_stacking_CGAU 0.4878707793
507 | helix_stacking_CGCG 0.8481320247
508 | helix_stacking_CGCU 0
509 | helix_stacking_CGGC 0.4784248478
510 | helix_stacking_CGGG 0
511 | helix_stacking_CGGU -0.1811268205
512 | helix_stacking_CGUC 0
513 | helix_stacking_CGUG 0.4849351028
514 | helix_stacking_CGUU 0
515 | helix_stacking_CUAG 0
516 | helix_stacking_CUAU 0
517 | helix_stacking_CUCG 0
518 | helix_stacking_CUCU 0
519 | helix_stacking_CUGG 0
520 | helix_stacking_CUGU 0
521 | helix_stacking_CUUC 0
522 | helix_stacking_CUUG 0
523 | helix_stacking_CUUU 0
524 | helix_stacking_GAAG 0
525 | helix_stacking_GAAU 0
526 | helix_stacking_GACG 0
527 | helix_stacking_GACU 0
528 | helix_stacking_GAGG 0
529 | helix_stacking_GAGU 0
530 | helix_stacking_GAUG 0
531 | helix_stacking_GAUU 0
532 | helix_stacking_GCAU 0.5551785831
533 | helix_stacking_GCCG 0.5008324248
534 | helix_stacking_GCCU 0
535 | helix_stacking_GCGG 0
536 | helix_stacking_GCGU 0.2165962476
537 | helix_stacking_GCUG 0.4864603589
538 | helix_stacking_GCUU 0
539 | helix_stacking_GGAU 0
540 | helix_stacking_GGCU 0
541 | helix_stacking_GGGG 0
542 | helix_stacking_GGGU 0
543 | helix_stacking_GGUG 0
544 | helix_stacking_GGUU 0
545 | helix_stacking_GUAU -0.04665365028
546 | helix_stacking_GUCU 0
547 | helix_stacking_GUGU 0.1833447295
548 | helix_stacking_GUUG -0.2858970755
549 | helix_stacking_GUUU 0
550 | helix_stacking_UAAU 0.3897593783
551 | helix_stacking_UACU 0
552 | helix_stacking_UAGU -0.1157333764
553 | helix_stacking_UAUU 0
554 | helix_stacking_UCCU 0
555 | helix_stacking_UCGU 0
556 | helix_stacking_UCUU 0
557 | helix_stacking_UGGU 0.120296538
558 | helix_stacking_UGUU 0
559 | helix_stacking_UUUU 0
560 | helix_closing_AA 0
561 | helix_closing_AC 0
562 | helix_closing_AG 0
563 | helix_closing_AU -0.9770893163
564 | helix_closing_CA 0
565 | helix_closing_CC 0
566 | helix_closing_CG -0.4574650937
567 | helix_closing_CU 0
568 | helix_closing_GA 0
569 | helix_closing_GC -0.8265995623
570 | helix_closing_GG 0
571 | helix_closing_GU -1.051678928
572 | helix_closing_UA -0.9246140521
573 | helix_closing_UC 0
574 | helix_closing_UG -0.3698708172
575 | helix_closing_UU 0
576 | multi_base -1.199055076
577 | multi_unpaired -0.1983300391
578 | multi_paired -0.9253883752
579 | dangle_left_AAA 0
580 | dangle_left_AAC 0
581 | dangle_left_AAG 0
582 | dangle_left_AAU 0
583 | dangle_left_ACA 0
584 | dangle_left_ACC 0
585 | dangle_left_ACG 0
586 | dangle_left_ACU 0
587 | dangle_left_AGA 0
588 | dangle_left_AGC 0
589 | dangle_left_AGG 0
590 | dangle_left_AGU 0
591 | dangle_left_AUA -0.1251037681
592 | dangle_left_AUC 0.0441606708
593 | dangle_left_AUG -0.02541879082
594 | dangle_left_AUU 0.00785098466
595 | dangle_left_CAA 0
596 | dangle_left_CAC 0
597 | dangle_left_CAG 0
598 | dangle_left_CAU 0
599 | dangle_left_CCA 0
600 | dangle_left_CCC 0
601 | dangle_left_CCG 0
602 | dangle_left_CCU 0
603 | dangle_left_CGA 0.07224381372
604 | dangle_left_CGC 0.05279281874
605 | dangle_left_CGG 0.1009554299
606 | dangle_left_CGU -0.1515059013
607 | dangle_left_CUA 0
608 | dangle_left_CUC 0
609 | dangle_left_CUG 0
610 | dangle_left_CUU 0
611 | dangle_left_GAA 0
612 | dangle_left_GAC 0
613 | dangle_left_GAG 0
614 | dangle_left_GAU 0
615 | dangle_left_GCA -0.1829535099
616 | dangle_left_GCC 0.03393000394
617 | dangle_left_GCG 0.1335339061
618 | dangle_left_GCU -0.1604274506
619 | dangle_left_GGA 0
620 | dangle_left_GGC 0
621 | dangle_left_GGG 0
622 | dangle_left_GGU 0
623 | dangle_left_GUA -0.06517511341
624 | dangle_left_GUC -0.04250882422
625 | dangle_left_GUG 0.02875971806
626 | dangle_left_GUU -0.04359727428
627 | dangle_left_UAA -0.03373847659
628 | dangle_left_UAC -0.005070324324
629 | dangle_left_UAG -0.1186861149
630 | dangle_left_UAU -0.01162357727
631 | dangle_left_UCA 0
632 | dangle_left_UCC 0
633 | dangle_left_UCG 0
634 | dangle_left_UCU 0
635 | dangle_left_UGA -0.08047139148
636 | dangle_left_UGC 0.001608000669
637 | dangle_left_UGG 0.1016272216
638 | dangle_left_UGU -0.09200842832
639 | dangle_left_UUA 0
640 | dangle_left_UUC 0
641 | dangle_left_UUG 0
642 | dangle_left_UUU 0
643 | dangle_right_AAA 0
644 | dangle_right_AAC 0
645 | dangle_right_AAG 0
646 | dangle_right_AAU 0
647 | dangle_right_ACA 0
648 | dangle_right_ACC 0
649 | dangle_right_ACG 0
650 | dangle_right_ACU 0
651 | dangle_right_AGA 0
652 | dangle_right_AGC 0
653 | dangle_right_AGG 0
654 | dangle_right_AGU 0
655 | dangle_right_AUA 0.03232578201
656 | dangle_right_AUC -0.09096819493
657 | dangle_right_AUG -0.0740750973
658 | dangle_right_AUU -0.01621157379
659 | dangle_right_CAA 0
660 | dangle_right_CAC 0
661 | dangle_right_CAG 0
662 | dangle_right_CAU 0
663 | dangle_right_CCA 0
664 | dangle_right_CCC 0
665 | dangle_right_CCG 0
666 | dangle_right_CCU 0
667 | dangle_right_CGA 0.2133964379
668 | dangle_right_CGC -0.06234810991
669 | dangle_right_CGG -0.07008531041
670 | dangle_right_CGU -0.2141912285
671 | dangle_right_CUA 0
672 | dangle_right_CUC 0
673 | dangle_right_CUG 0
674 | dangle_right_CUU 0
675 | dangle_right_GAA 0
676 | dangle_right_GAC 0
677 | dangle_right_GAG 0
678 | dangle_right_GAU 0
679 | dangle_right_GCA 0.01581957549
680 | dangle_right_GCC 0.005644320058
681 | dangle_right_GCG -0.00943297687
682 | dangle_right_GCU -0.2597793095
683 | dangle_right_GGA 0
684 | dangle_right_GGC 0
685 | dangle_right_GGG 0
686 | dangle_right_GGU 0
687 | dangle_right_GUA -0.04480271781
688 | dangle_right_GUC -0.07321213002
689 | dangle_right_GUG 0.01270494867
690 | dangle_right_GUU -0.05717033985
691 | dangle_right_UAA -0.1631918513
692 | dangle_right_UAC 0.06769304994
693 | dangle_right_UAG -0.08789074414
694 | dangle_right_UAU -0.05525570007
695 | dangle_right_UCA 0
696 | dangle_right_UCC 0
697 | dangle_right_UCG 0
698 | dangle_right_UCU 0
699 | dangle_right_UGA 0.04105458185
700 | dangle_right_UGC -0.008136642572
701 | dangle_right_UGG -0.03808592022
702 | dangle_right_UGU -0.08629373429
703 | dangle_right_UUA 0
704 | dangle_right_UUC 0
705 | dangle_right_UUG 0
706 | dangle_right_UUU 0
707 | external_unpaired -0.00972883093
708 | external_paired -0.0009674111431
709 |
--------------------------------------------------------------------------------
/parameter_files/learntofold.contrafold.params:
--------------------------------------------------------------------------------
1 | base_pair_AA 0
2 | base_pair_AC 0
3 | base_pair_AG 0
4 | base_pair_AU 0.117196
5 | base_pair_CC 0
6 | base_pair_CG 0.42785
7 | base_pair_CU 0
8 | base_pair_GG 0
9 | base_pair_GU -0.144535
10 | base_pair_UU 0
11 | terminal_mismatch_AAAA 0
12 | terminal_mismatch_AAAC 0
13 | terminal_mismatch_AAAG 0
14 | terminal_mismatch_AAAU 0
15 | terminal_mismatch_AACA 0
16 | terminal_mismatch_AACC 0
17 | terminal_mismatch_AACG 0
18 | terminal_mismatch_AACU 0
19 | terminal_mismatch_AAGA 0
20 | terminal_mismatch_AAGC 0
21 | terminal_mismatch_AAGG 0
22 | terminal_mismatch_AAGU 0
23 | terminal_mismatch_AAUA 0
24 | terminal_mismatch_AAUC 0
25 | terminal_mismatch_AAUG 0
26 | terminal_mismatch_AAUU 0
27 | terminal_mismatch_ACAA 0
28 | terminal_mismatch_ACAC 0
29 | terminal_mismatch_ACAG 0
30 | terminal_mismatch_ACAU 0
31 | terminal_mismatch_ACCA 0
32 | terminal_mismatch_ACCC 0
33 | terminal_mismatch_ACCG 0
34 | terminal_mismatch_ACCU 0
35 | terminal_mismatch_ACGA 0
36 | terminal_mismatch_ACGC 0
37 | terminal_mismatch_ACGG 0
38 | terminal_mismatch_ACGU 0
39 | terminal_mismatch_ACUA 0
40 | terminal_mismatch_ACUC 0
41 | terminal_mismatch_ACUG 0
42 | terminal_mismatch_ACUU 0
43 | terminal_mismatch_AGAA 0
44 | terminal_mismatch_AGAC 0
45 | terminal_mismatch_AGAG 0
46 | terminal_mismatch_AGAU 0
47 | terminal_mismatch_AGCA 0
48 | terminal_mismatch_AGCC 0
49 | terminal_mismatch_AGCG 0
50 | terminal_mismatch_AGCU 0
51 | terminal_mismatch_AGGA 0
52 | terminal_mismatch_AGGC 0
53 | terminal_mismatch_AGGG 0
54 | terminal_mismatch_AGGU 0
55 | terminal_mismatch_AGUA 0
56 | terminal_mismatch_AGUC 0
57 | terminal_mismatch_AGUG 0
58 | terminal_mismatch_AGUU 0
59 | terminal_mismatch_AUAA -0.168158
60 | terminal_mismatch_AUAC -0.242468
61 | terminal_mismatch_AUAG -0.171538
62 | terminal_mismatch_AUAU 0.063824
63 | terminal_mismatch_AUCA -0.136324
64 | terminal_mismatch_AUCC 0.0340154
65 | terminal_mismatch_AUCG 0.412095
66 | terminal_mismatch_AUCU -0.158066
67 | terminal_mismatch_AUGA 0.235308
68 | terminal_mismatch_AUGC 0.446161
69 | terminal_mismatch_AUGG -0.31236
70 | terminal_mismatch_AUGU -0.174198
71 | terminal_mismatch_AUUA 0.427164
72 | terminal_mismatch_AUUC 0.351693
73 | terminal_mismatch_AUUG 0.112834
74 | terminal_mismatch_AUUU 0.0114197
75 | terminal_mismatch_CAAA 0
76 | terminal_mismatch_CAAC 0
77 | terminal_mismatch_CAAG 0
78 | terminal_mismatch_CAAU 0
79 | terminal_mismatch_CACA 0
80 | terminal_mismatch_CACC 0
81 | terminal_mismatch_CACG 0
82 | terminal_mismatch_CACU 0
83 | terminal_mismatch_CAGA 0
84 | terminal_mismatch_CAGC 0
85 | terminal_mismatch_CAGG 0
86 | terminal_mismatch_CAGU 0
87 | terminal_mismatch_CAUA 0
88 | terminal_mismatch_CAUC 0
89 | terminal_mismatch_CAUG 0
90 | terminal_mismatch_CAUU 0
91 | terminal_mismatch_CCAA 0
92 | terminal_mismatch_CCAC 0
93 | terminal_mismatch_CCAG 0
94 | terminal_mismatch_CCAU 0
95 | terminal_mismatch_CCCA 0
96 | terminal_mismatch_CCCC 0
97 | terminal_mismatch_CCCG 0
98 | terminal_mismatch_CCCU 0
99 | terminal_mismatch_CCGA 0
100 | terminal_mismatch_CCGC 0
101 | terminal_mismatch_CCGG 0
102 | terminal_mismatch_CCGU 0
103 | terminal_mismatch_CCUA 0
104 | terminal_mismatch_CCUC 0
105 | terminal_mismatch_CCUG 0
106 | terminal_mismatch_CCUU 0
107 | terminal_mismatch_CGAA -0.109134
108 | terminal_mismatch_CGAC -0.316447
109 | terminal_mismatch_CGAG -0.62242
110 | terminal_mismatch_CGAU 0.0216624
111 | terminal_mismatch_CGCA 0.0388758
112 | terminal_mismatch_CGCC -0.281257
113 | terminal_mismatch_CGCG 0.241614
114 | terminal_mismatch_CGCU -0.397997
115 | terminal_mismatch_CGGA 0.327717
116 | terminal_mismatch_CGGC 0.110783
117 | terminal_mismatch_CGGG -0.527171
118 | terminal_mismatch_CGGU -0.429919
119 | terminal_mismatch_CGUA 0.171414
120 | terminal_mismatch_CGUC -0.279608
121 | terminal_mismatch_CGUG 0.100497
122 | terminal_mismatch_CGUU -0.248438
123 | terminal_mismatch_CUAA 0
124 | terminal_mismatch_CUAC 0
125 | terminal_mismatch_CUAG 0
126 | terminal_mismatch_CUAU 0
127 | terminal_mismatch_CUCA 0
128 | terminal_mismatch_CUCC 0
129 | terminal_mismatch_CUCG 0
130 | terminal_mismatch_CUCU 0
131 | terminal_mismatch_CUGA 0
132 | terminal_mismatch_CUGC 0
133 | terminal_mismatch_CUGG 0
134 | terminal_mismatch_CUGU 0
135 | terminal_mismatch_CUUA 0
136 | terminal_mismatch_CUUC 0
137 | terminal_mismatch_CUUG 0
138 | terminal_mismatch_CUUU 0
139 | terminal_mismatch_GAAA 0
140 | terminal_mismatch_GAAC 0
141 | terminal_mismatch_GAAG 0
142 | terminal_mismatch_GAAU 0
143 | terminal_mismatch_GACA 0
144 | terminal_mismatch_GACC 0
145 | terminal_mismatch_GACG 0
146 | terminal_mismatch_GACU 0
147 | terminal_mismatch_GAGA 0
148 | terminal_mismatch_GAGC 0
149 | terminal_mismatch_GAGG 0
150 | terminal_mismatch_GAGU 0
151 | terminal_mismatch_GAUA 0
152 | terminal_mismatch_GAUC 0
153 | terminal_mismatch_GAUG 0
154 | terminal_mismatch_GAUU 0
155 | terminal_mismatch_GCAA -0.566345
156 | terminal_mismatch_GCAC -0.0306717
157 | terminal_mismatch_GCAG -0.266614
158 | terminal_mismatch_GCAU -0.154598
159 | terminal_mismatch_GCCA -0.316693
160 | terminal_mismatch_GCCC -0.131361
161 | terminal_mismatch_GCCG 0.363139
162 | terminal_mismatch_GCCU -0.41638
163 | terminal_mismatch_GCGA 0.353058
164 | terminal_mismatch_GCGC 0.368934
165 | terminal_mismatch_GCGG -0.0630469
166 | terminal_mismatch_GCGU -0.255096
167 | terminal_mismatch_GCUA 0.0728846
168 | terminal_mismatch_GCUC -0.0480102
169 | terminal_mismatch_GCUG 0.374379
170 | terminal_mismatch_GCUU 0.0624913
171 | terminal_mismatch_GGAA 0
172 | terminal_mismatch_GGAC 0
173 | terminal_mismatch_GGAG 0
174 | terminal_mismatch_GGAU 0
175 | terminal_mismatch_GGCA 0
176 | terminal_mismatch_GGCC 0
177 | terminal_mismatch_GGCG 0
178 | terminal_mismatch_GGCU 0
179 | terminal_mismatch_GGGA 0
180 | terminal_mismatch_GGGC 0
181 | terminal_mismatch_GGGG 0
182 | terminal_mismatch_GGGU 0
183 | terminal_mismatch_GGUA 0
184 | terminal_mismatch_GGUC 0
185 | terminal_mismatch_GGUG 0
186 | terminal_mismatch_GGUU 0
187 | terminal_mismatch_GUAA -0.22414
188 | terminal_mismatch_GUAC -0.133311
189 | terminal_mismatch_GUAG -0.359489
190 | terminal_mismatch_GUAU -0.330393
191 | terminal_mismatch_GUCA 0.0365249
192 | terminal_mismatch_GUCC 0.0615222
193 | terminal_mismatch_GUCG 0.290182
194 | terminal_mismatch_GUCU -0.176866
195 | terminal_mismatch_GUGA -0.19437
196 | terminal_mismatch_GUGC 0.0494159
197 | terminal_mismatch_GUGG -0.203475
198 | terminal_mismatch_GUGU -0.171151
199 | terminal_mismatch_GUUA 0.0401032
200 | terminal_mismatch_GUUC -0.105719
201 | terminal_mismatch_GUUG -0.302561
202 | terminal_mismatch_GUUU -0.445895
203 | terminal_mismatch_UAAA -0.586072
204 | terminal_mismatch_UAAC -0.61438
205 | terminal_mismatch_UAAG -0.405239
206 | terminal_mismatch_UAAU -0.133188
207 | terminal_mismatch_UACA -0.616378
208 | terminal_mismatch_UACC -0.624385
209 | terminal_mismatch_UACG -0.258873
210 | terminal_mismatch_UACU -0.681676
211 | terminal_mismatch_UAGA -0.342396
212 | terminal_mismatch_UAGC 0.239263
213 | terminal_mismatch_UAGG -0.667443
214 | terminal_mismatch_UAGU -0.766636
215 | terminal_mismatch_UAUA -0.308715
216 | terminal_mismatch_UAUC -0.00697584
217 | terminal_mismatch_UAUG -0.502953
218 | terminal_mismatch_UAUU -0.528393
219 | terminal_mismatch_UCAA 0
220 | terminal_mismatch_UCAC 0
221 | terminal_mismatch_UCAG 0
222 | terminal_mismatch_UCAU 0
223 | terminal_mismatch_UCCA 0
224 | terminal_mismatch_UCCC 0
225 | terminal_mismatch_UCCG 0
226 | terminal_mismatch_UCCU 0
227 | terminal_mismatch_UCGA 0
228 | terminal_mismatch_UCGC 0
229 | terminal_mismatch_UCGG 0
230 | terminal_mismatch_UCGU 0
231 | terminal_mismatch_UCUA 0
232 | terminal_mismatch_UCUC 0
233 | terminal_mismatch_UCUG 0
234 | terminal_mismatch_UCUU 0
235 | terminal_mismatch_UGAA 0.0753088
236 | terminal_mismatch_UGAC 0.27512
237 | terminal_mismatch_UGAG -0.050858
238 | terminal_mismatch_UGAU 0.192983
239 | terminal_mismatch_UGCA 0.443018
240 | terminal_mismatch_UGCC 0.0480001
241 | terminal_mismatch_UGCG 0.497822
242 | terminal_mismatch_UGCU 0.157055
243 | terminal_mismatch_UGGA 0.836611
244 | terminal_mismatch_UGGC 0.282301
245 | terminal_mismatch_UGGG 0.0988858
246 | terminal_mismatch_UGGU 0.234094
247 | terminal_mismatch_UGUA 0.114609
248 | terminal_mismatch_UGUC 0.214683
249 | terminal_mismatch_UGUG 0.246988
250 | terminal_mismatch_UGUU 0.371336
251 | terminal_mismatch_UUAA 0
252 | terminal_mismatch_UUAC 0
253 | terminal_mismatch_UUAG 0
254 | terminal_mismatch_UUAU 0
255 | terminal_mismatch_UUCA 0
256 | terminal_mismatch_UUCC 0
257 | terminal_mismatch_UUCG 0
258 | terminal_mismatch_UUCU 0
259 | terminal_mismatch_UUGA 0
260 | terminal_mismatch_UUGC 0
261 | terminal_mismatch_UUGG 0
262 | terminal_mismatch_UUGU 0
263 | terminal_mismatch_UUUA 0
264 | terminal_mismatch_UUUC 0
265 | terminal_mismatch_UUUG 0
266 | terminal_mismatch_UUUU 0
267 | hairpin_length_at_least_0 -1.84406
268 | hairpin_length_at_least_1 0.38098
269 | hairpin_length_at_least_2 0.859909
270 | hairpin_length_at_least_3 0.295419
271 | hairpin_length_at_least_4 0.7661
272 | hairpin_length_at_least_5 -0.338749
273 | hairpin_length_at_least_6 -0.0639211
274 | hairpin_length_at_least_7 0.315558
275 | hairpin_length_at_least_8 -0.362892
276 | hairpin_length_at_least_9 -0.176655
277 | hairpin_length_at_least_10 -0.263635
278 | hairpin_length_at_least_11 -0.129676
279 | hairpin_length_at_least_12 0.105682
280 | hairpin_length_at_least_13 0.08146
281 | hairpin_length_at_least_14 -0.855376
282 | hairpin_length_at_least_15 -0.0377099
283 | hairpin_length_at_least_16 0.0421525
284 | hairpin_length_at_least_17 0.107804
285 | hairpin_length_at_least_18 -0.216865
286 | hairpin_length_at_least_19 -0.0181023
287 | hairpin_length_at_least_20 -0.225869
288 | hairpin_length_at_least_21 -0.181939
289 | hairpin_length_at_least_22 0.0310624
290 | hairpin_length_at_least_23 -0.0905128
291 | hairpin_length_at_least_24 -0.306419
292 | hairpin_length_at_least_25 -0.13717
293 | hairpin_length_at_least_26 0.132407
294 | hairpin_length_at_least_27 -0.130469
295 | hairpin_length_at_least_28 -0.0067091
296 | hairpin_length_at_least_29 -0.115291
297 | hairpin_length_at_least_30 -0.39803
298 | internal_explicit_1_1 0.155859
299 | internal_explicit_1_2 -0.121667
300 | internal_explicit_1_3 0.0100364
301 | internal_explicit_1_4 0.199334
302 | internal_explicit_2_2 0.130952
303 | internal_explicit_2_3 -0.187011
304 | internal_explicit_2_4 -0.110813
305 | internal_explicit_3_3 0.0529937
306 | internal_explicit_3_4 -0.357182
307 | internal_explicit_4_4 0.12988
308 | bulge_length_at_least_1 -0.10609
309 | bulge_length_at_least_2 -0.294864
310 | bulge_length_at_least_3 -0.36619
311 | bulge_length_at_least_4 -0.577635
312 | bulge_length_at_least_5 -0.404122
313 | bulge_length_at_least_6 -0.508964
314 | bulge_length_at_least_7 -0.0211596
315 | bulge_length_at_least_8 0.749466
316 | bulge_length_at_least_9 -0.532326
317 | bulge_length_at_least_10 -0.585856
318 | bulge_length_at_least_11 -0.356308
319 | bulge_length_at_least_12 0.119846
320 | bulge_length_at_least_13 0.25548
321 | bulge_length_at_least_14 0.146516
322 | bulge_length_at_least_15 -0.546997
323 | bulge_length_at_least_16 0.147717
324 | bulge_length_at_least_17 0.0178208
325 | bulge_length_at_least_18 0.0080868
326 | bulge_length_at_least_19 0.456916
327 | bulge_length_at_least_20 -0.42458
328 | bulge_length_at_least_21 0.145037
329 | bulge_length_at_least_22 -0.105019
330 | bulge_length_at_least_23 -0.342105
331 | bulge_length_at_least_24 -0.0779023
332 | bulge_length_at_least_25 -0.193858
333 | bulge_length_at_least_26 -0.00769006
334 | bulge_length_at_least_27 -0.111807
335 | bulge_length_at_least_28 0.155611
336 | bulge_length_at_least_29 0.335468
337 | bulge_length_at_least_30 1.18348
338 | internal_length_at_least_2 0.0141383
339 | internal_length_at_least_3 -0.0934192
340 | internal_length_at_least_4 -0.0617787
341 | internal_length_at_least_5 -0.115015
342 | internal_length_at_least_6 -0.100272
343 | internal_length_at_least_7 0.260368
344 | internal_length_at_least_8 -0.258777
345 | internal_length_at_least_9 0.0776641
346 | internal_length_at_least_10 -0.249379
347 | internal_length_at_least_11 0.0528477
348 | internal_length_at_least_12 -0.478489
349 | internal_length_at_least_13 -0.106756
350 | internal_length_at_least_14 -0.000894333
351 | internal_length_at_least_15 -0.334079
352 | internal_length_at_least_16 0.0711885
353 | internal_length_at_least_17 -0.203494
354 | internal_length_at_least_18 0.253692
355 | internal_length_at_least_19 -0.232494
356 | internal_length_at_least_20 0.358359
357 | internal_length_at_least_21 -0.366355
358 | internal_length_at_least_22 0.245564
359 | internal_length_at_least_23 -0.489612
360 | internal_length_at_least_24 0.262947
361 | internal_length_at_least_25 -0.433761
362 | internal_length_at_least_26 0.0245611
363 | internal_length_at_least_27 -0.128352
364 | internal_length_at_least_28 0.100132
365 | internal_length_at_least_29 -0.208747
366 | internal_length_at_least_30 0.827826
367 | internal_symmetric_length_at_least_1 0.0656625
368 | internal_symmetric_length_at_least_2 -0.087095
369 | internal_symmetric_length_at_least_3 -0.0711241
370 | internal_symmetric_length_at_least_4 0.0126792
371 | internal_symmetric_length_at_least_5 -0.233107
372 | internal_symmetric_length_at_least_6 -0.112285
373 | internal_symmetric_length_at_least_7 -0.120892
374 | internal_symmetric_length_at_least_8 0.0783225
375 | internal_symmetric_length_at_least_9 -0.120047
376 | internal_symmetric_length_at_least_10 -0.44724
377 | internal_symmetric_length_at_least_11 -0.0132272
378 | internal_symmetric_length_at_least_12 -0.118194
379 | internal_symmetric_length_at_least_13 0.0859623
380 | internal_symmetric_length_at_least_14 -0.178603
381 | internal_symmetric_length_at_least_15 -0.178603
382 | internal_asymmetry_at_least_1 -0.0748923
383 | internal_asymmetry_at_least_2 -0.382543
384 | internal_asymmetry_at_least_3 -0.251796
385 | internal_asymmetry_at_least_4 -0.421874
386 | internal_asymmetry_at_least_5 -0.34332
387 | internal_asymmetry_at_least_6 -0.115644
388 | internal_asymmetry_at_least_7 -0.165334
389 | internal_asymmetry_at_least_8 0.197739
390 | internal_asymmetry_at_least_9 -0.186715
391 | internal_asymmetry_at_least_10 0.076971
392 | internal_asymmetry_at_least_11 0.0362528
393 | internal_asymmetry_at_least_12 -0.220953
394 | internal_asymmetry_at_least_13 0.108824
395 | internal_asymmetry_at_least_14 -0.0164457
396 | internal_asymmetry_at_least_15 0.368713
397 | internal_asymmetry_at_least_16 -0.438663
398 | internal_asymmetry_at_least_17 0.16405
399 | internal_asymmetry_at_least_18 -0.0398533
400 | internal_asymmetry_at_least_19 0.1949
401 | internal_asymmetry_at_least_20 0.0771696
402 | internal_asymmetry_at_least_21 0.41823
403 | internal_asymmetry_at_least_22 -0.632993
404 | internal_asymmetry_at_least_23 -0.116177
405 | internal_asymmetry_at_least_24 -0.12073
406 | internal_asymmetry_at_least_25 0.0344756
407 | internal_asymmetry_at_least_26 -0.0637855
408 | internal_asymmetry_at_least_27 0.264182
409 | internal_asymmetry_at_least_28 0.393391
410 | bulge_0x1_nucleotides_A 0.0265834
411 | bulge_0x1_nucleotides_C 0.187646
412 | bulge_0x1_nucleotides_G 0.213565
413 | bulge_0x1_nucleotides_U 0.139233
414 | internal_1x1_nucleotides_AA 0.115743
415 | internal_1x1_nucleotides_AC 0.0287969
416 | internal_1x1_nucleotides_AG -0.142761
417 | internal_1x1_nucleotides_AU 0.780265
418 | internal_1x1_nucleotides_CC 0.0215604
419 | internal_1x1_nucleotides_CG 0.834524
420 | internal_1x1_nucleotides_CU 0.0301214
421 | internal_1x1_nucleotides_GG 0.220881
422 | internal_1x1_nucleotides_GU 0.608098
423 | internal_1x1_nucleotides_UU 0.161178
424 | helix_stacking_AAAA 0
425 | helix_stacking_AAAC 0
426 | helix_stacking_AAAG 0
427 | helix_stacking_AAAU 0
428 | helix_stacking_AACA 0
429 | helix_stacking_AACC 0
430 | helix_stacking_AACG 0
431 | helix_stacking_AACU 0
432 | helix_stacking_AAGA 0
433 | helix_stacking_AAGC 0
434 | helix_stacking_AAGG 0
435 | helix_stacking_AAGU 0
436 | helix_stacking_AAUA 0
437 | helix_stacking_AAUC 0
438 | helix_stacking_AAUG 0
439 | helix_stacking_AAUU 0
440 | helix_stacking_ACAC 0
441 | helix_stacking_ACAG 0
442 | helix_stacking_ACAU 0
443 | helix_stacking_ACCA 0
444 | helix_stacking_ACCC 0
445 | helix_stacking_ACCG 0
446 | helix_stacking_ACCU 0
447 | helix_stacking_ACGA 0
448 | helix_stacking_ACGC 0
449 | helix_stacking_ACGG 0
450 | helix_stacking_ACGU 0
451 | helix_stacking_ACUA 0
452 | helix_stacking_ACUC 0
453 | helix_stacking_ACUG 0
454 | helix_stacking_ACUU 0
455 | helix_stacking_AGAC 0
456 | helix_stacking_AGAG 0
457 | helix_stacking_AGAU 0
458 | helix_stacking_AGCC 0
459 | helix_stacking_AGCG 0
460 | helix_stacking_AGCU 0
461 | helix_stacking_AGGA 0
462 | helix_stacking_AGGC 0
463 | helix_stacking_AGGG 0
464 | helix_stacking_AGGU 0
465 | helix_stacking_AGUA 0
466 | helix_stacking_AGUC 0
467 | helix_stacking_AGUG 0
468 | helix_stacking_AGUU 0
469 | helix_stacking_AUAC 0
470 | helix_stacking_AUAG 0
471 | helix_stacking_AUAU 0.166949
472 | helix_stacking_AUCC 0
473 | helix_stacking_AUCG 0.457814
474 | helix_stacking_AUCU 0
475 | helix_stacking_AUGC 0.625282
476 | helix_stacking_AUGG 0
477 | helix_stacking_AUGU -0.0635901
478 | helix_stacking_AUUA 0.484831
479 | helix_stacking_AUUC 0
480 | helix_stacking_AUUG 0.229207
481 | helix_stacking_AUUU 0
482 | helix_stacking_CAAC 0
483 | helix_stacking_CAAG 0
484 | helix_stacking_CAAU 0
485 | helix_stacking_CACC 0
486 | helix_stacking_CACG 0
487 | helix_stacking_CACU 0
488 | helix_stacking_CAGC 0
489 | helix_stacking_CAGG 0
490 | helix_stacking_CAGU 0
491 | helix_stacking_CAUC 0
492 | helix_stacking_CAUG 0
493 | helix_stacking_CAUU 0
494 | helix_stacking_CCAG 0
495 | helix_stacking_CCAU 0
496 | helix_stacking_CCCC 0
497 | helix_stacking_CCCG 0
498 | helix_stacking_CCCU 0
499 | helix_stacking_CCGC 0
500 | helix_stacking_CCGG 0
501 | helix_stacking_CCGU 0
502 | helix_stacking_CCUC 0
503 | helix_stacking_CCUG 0
504 | helix_stacking_CCUU 0
505 | helix_stacking_CGAG 0
506 | helix_stacking_CGAU 0.60886
507 | helix_stacking_CGCG 0.927152
508 | helix_stacking_CGCU 0
509 | helix_stacking_CGGC 0.483599
510 | helix_stacking_CGGG 0
511 | helix_stacking_CGGU 0.00568172
512 | helix_stacking_CGUC 0
513 | helix_stacking_CGUG 0.370247
514 | helix_stacking_CGUU 0
515 | helix_stacking_CUAG 0
516 | helix_stacking_CUAU 0
517 | helix_stacking_CUCG 0
518 | helix_stacking_CUCU 0
519 | helix_stacking_CUGG 0
520 | helix_stacking_CUGU 0
521 | helix_stacking_CUUC 0
522 | helix_stacking_CUUG 0
523 | helix_stacking_CUUU 0
524 | helix_stacking_GAAG 0
525 | helix_stacking_GAAU 0
526 | helix_stacking_GACG 0
527 | helix_stacking_GACU 0
528 | helix_stacking_GAGG 0
529 | helix_stacking_GAGU 0
530 | helix_stacking_GAUG 0
531 | helix_stacking_GAUU 0
532 | helix_stacking_GCAU 0.342121
533 | helix_stacking_GCCG 0.77176
534 | helix_stacking_GCCU 0
535 | helix_stacking_GCGG 0
536 | helix_stacking_GCGU 0.313625
537 | helix_stacking_GCUG 0.474024
538 | helix_stacking_GCUU 0
539 | helix_stacking_GGAU 0
540 | helix_stacking_GGCU 0
541 | helix_stacking_GGGG 0
542 | helix_stacking_GGGU 0
543 | helix_stacking_GGUG 0
544 | helix_stacking_GGUU 0
545 | helix_stacking_GUAU -0.0905706
546 | helix_stacking_GUCU 0
547 | helix_stacking_GUGU 0.175914
548 | helix_stacking_GUUG -0.265254
549 | helix_stacking_GUUU 0
550 | helix_stacking_UAAU 0.285857
551 | helix_stacking_UACU 0
552 | helix_stacking_UAGU -0.0092986
553 | helix_stacking_UAUU 0
554 | helix_stacking_UCCU 0
555 | helix_stacking_UCGU 0
556 | helix_stacking_UCUU 0
557 | helix_stacking_UGGU 0.605438
558 | helix_stacking_UGUU 0
559 | helix_stacking_UUUU 0
560 | helix_closing_AA 0
561 | helix_closing_AC 0
562 | helix_closing_AG 0
563 | helix_closing_AU -0.904257
564 | helix_closing_CA 0
565 | helix_closing_CC 0
566 | helix_closing_CG -0.447655
567 | helix_closing_CU 0
568 | helix_closing_GA 0
569 | helix_closing_GC -0.664996
570 | helix_closing_GG 0
571 | helix_closing_GU -0.551376
572 | helix_closing_UA -0.469223
573 | helix_closing_UC 0
574 | helix_closing_UG -0.690579
575 | helix_closing_UU 0
576 | multi_base 0.392109
577 | multi_unpaired -0.0305723
578 | multi_paired -0.324548
579 | dangle_left_AAA 0
580 | dangle_left_AAC 0
581 | dangle_left_AAG 0
582 | dangle_left_AAU 0
583 | dangle_left_ACA 0
584 | dangle_left_ACC 0
585 | dangle_left_ACG 0
586 | dangle_left_ACU 0
587 | dangle_left_AGA 0
588 | dangle_left_AGC 0
589 | dangle_left_AGG 0
590 | dangle_left_AGU 0
591 | dangle_left_AUA -0.0096949
592 | dangle_left_AUC 0.296587
593 | dangle_left_AUG 0.264354
594 | dangle_left_AUU 0.467729
595 | dangle_left_CAA 0
596 | dangle_left_CAC 0
597 | dangle_left_CAG 0
598 | dangle_left_CAU 0
599 | dangle_left_CCA 0
600 | dangle_left_CCC 0
601 | dangle_left_CCG 0
602 | dangle_left_CCU 0
603 | dangle_left_CGA 0.196253
604 | dangle_left_CGC 0.440535
605 | dangle_left_CGG 0.390397
606 | dangle_left_CGU 0.139024
607 | dangle_left_CUA 0
608 | dangle_left_CUC 0
609 | dangle_left_CUG 0
610 | dangle_left_CUU 0
611 | dangle_left_GAA 0
612 | dangle_left_GAC 0
613 | dangle_left_GAG 0
614 | dangle_left_GAU 0
615 | dangle_left_GCA -0.320284
616 | dangle_left_GCC -0.181196
617 | dangle_left_GCG 0.0390977
618 | dangle_left_GCU 0.175603
619 | dangle_left_GGA 0
620 | dangle_left_GGC 0
621 | dangle_left_GGG 0
622 | dangle_left_GGU 0
623 | dangle_left_GUA -0.0839476
624 | dangle_left_GUC 0.148304
625 | dangle_left_GUG 0.0216176
626 | dangle_left_GUU 0.053797
627 | dangle_left_UAA -0.0866879
628 | dangle_left_UAC -0.250894
629 | dangle_left_UAG -0.322181
630 | dangle_left_UAU -0.0654954
631 | dangle_left_UCA 0
632 | dangle_left_UCC 0
633 | dangle_left_UCG 0
634 | dangle_left_UCU 0
635 | dangle_left_UGA -0.168554
636 | dangle_left_UGC 0.117638
637 | dangle_left_UGG 0.304698
638 | dangle_left_UGU 0.0870223
639 | dangle_left_UUA 0
640 | dangle_left_UUC 0
641 | dangle_left_UUG 0
642 | dangle_left_UUU 0
643 | dangle_right_AAA 0
644 | dangle_right_AAC 0
645 | dangle_right_AAG 0
646 | dangle_right_AAU 0
647 | dangle_right_ACA 0
648 | dangle_right_ACC 0
649 | dangle_right_ACG 0
650 | dangle_right_ACU 0
651 | dangle_right_AGA 0
652 | dangle_right_AGC 0
653 | dangle_right_AGG 0
654 | dangle_right_AGU 0
655 | dangle_right_AUA -0.927456
656 | dangle_right_AUC -1.10559
657 | dangle_right_AUG -0.981522
658 | dangle_right_AUU -0.995162
659 | dangle_right_CAA 0
660 | dangle_right_CAC 0
661 | dangle_right_CAG 0
662 | dangle_right_CAU 0
663 | dangle_right_CCA 0
664 | dangle_right_CCC 0
665 | dangle_right_CCG 0
666 | dangle_right_CCU 0
667 | dangle_right_CGA -0.82867
668 | dangle_right_CGC -1.11699
669 | dangle_right_CGG -1.23095
670 | dangle_right_CGU -1.23702
671 | dangle_right_CUA 0
672 | dangle_right_CUC 0
673 | dangle_right_CUG 0
674 | dangle_right_CUU 0
675 | dangle_right_GAA 0
676 | dangle_right_GAC 0
677 | dangle_right_GAG 0
678 | dangle_right_GAU 0
679 | dangle_right_GCA -0.532095
680 | dangle_right_GCC -0.54946
681 | dangle_right_GCG -0.398636
682 | dangle_right_GCU -0.868356
683 | dangle_right_GGA 0
684 | dangle_right_GGC 0
685 | dangle_right_GGG 0
686 | dangle_right_GGU 0
687 | dangle_right_GUA -0.908315
688 | dangle_right_GUC -0.876077
689 | dangle_right_GUG -0.991237
690 | dangle_right_GUU -1.08336
691 | dangle_right_UAA -1.04753
692 | dangle_right_UAC -0.918508
693 | dangle_right_UAG -1.1966
694 | dangle_right_UAU -1.07818
695 | dangle_right_UCA 0
696 | dangle_right_UCC 0
697 | dangle_right_UCG 0
698 | dangle_right_UCU 0
699 | dangle_right_UGA -0.463016
700 | dangle_right_UGC -0.463076
701 | dangle_right_UGG -0.779374
702 | dangle_right_UGU -0.559652
703 | dangle_right_UUA 0
704 | dangle_right_UUC 0
705 | dangle_right_UUG 0
706 | dangle_right_UUU 0
707 | external_unpaired -0.144898
708 | external_paired -1.54974
709 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 |
2 | [project]
3 | name = "arnie"
4 | version = "0.2.7"
5 | authors = [
6 | { name="Das Lab", email="thedaslab@stanford.edu" },
7 | ]
8 | description = "A Python utility library to estimate, compare, and reweight RNA energetics across many secondary structure algorithms."
9 | readme = "README.md"
10 | requires-python = ">=3.7"
11 | classifiers = [
12 | "Programming Language :: Python :: 3",
13 | "License :: OSI Approved :: MIT License",
14 | "Operating System :: OS Independent",
15 | ]
16 | keywords = ["RNA", "RNA structure prediction", "Bioinformatics"]
17 | dependencies = [
18 | "numpy>=1.15",
19 | "scipy>=1.5.0",
20 | "matplotlib>=3.0.0"
21 | ]
22 |
23 | [project.urls]
24 | "Homepage" = "https://github.com/DasLab/arnie"
25 | "Documentation" = "https://daslab.github.io/arnie"
26 | "Bug Tracker" = "https://github.com/DasLab/arnie/issues"
27 |
28 | [build-system]
29 | requires = ["setuptools>=61.0"]
30 | build-backend = "setuptools.build_meta"
31 |
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | pythonpath = src
--------------------------------------------------------------------------------
/scripts/score_pseudoacc_mea.py:
--------------------------------------------------------------------------------
1 | from arnie.mea.mea import *
2 | import numpy as np
3 | from glob import glob
4 | import argparse
5 | import sys, os
6 |
7 | def predict_MEA_structures(matrix_list, gamma_min=-7, gamma_max=7, verbose=False, metric='mcc', output_dir='MEA_output'):
8 | '''Estimate maximum expected pseudoaccuracy structures per Hamada et al. BMC Bioinf 2010 11:586.
9 |
10 | Note: Files in matrix_dir and true_structs need to have the same names corresponding to their same constructs, but suffixes don't matter.
11 |
12 | Inputs:
13 |
14 | matrix_dir: list of NxN base pair probability matrices.
15 | gamma_min, gamma_max: min/max log_2(gamma) value used, defaults are -7 and 7.
16 | metric: keyword-based, which metric to use to select structure. Options are 'sen', 'ppv', 'mcc', 'fscore'.
17 | verbose: print output or not (for command line use)
18 |
19 | Outputs:
20 | List of predicted structures (in dbn format) at each gamma.
21 |
22 | '''
23 |
24 | metric_ind = ['sen', 'ppv', 'mcc', 'fscore'].index(metric)
25 |
26 | if len(matrix_list) == 0:
27 | raise ValueError('No matrix files found!')
28 |
29 | matrices = [np.loadtxt(x) for x in matrix_list]
30 | pdb_indices = [os.path.basename(x).split('.')[0] for x in matrix_list]
31 |
32 | n_constructs = len(matrices)
33 |
34 | gamma_vals = [x for x in range(gamma_min, gamma_max)]
35 | best_metric_values, best_gammas, best_structs,best_metrics = [],[],[],[]
36 |
37 | metrics_across_gammas = {k:[] for k in gamma_vals}
38 |
39 | if verbose: print('\nmetric\tpdb_ind\tbest_log2g\tbest_metric_value\tbest_struct')
40 |
41 | for i, matrix in enumerate(matrices):
42 |
43 | running_best_metrics = []
44 | running_best_value = 0
45 | running_best_gamma = -101
46 | running_best_struct = ''
47 |
48 | for g in gamma_vals:
49 |
50 | mea_cls = MEA(matrix, gamma=2**g)
51 |
52 | metrics = mea_cls.score_expected() #sen, ppv, mcc, fscore
53 | metrics_across_gammas[g].append(metrics)
54 |
55 | if metrics[metric_ind] > running_best_value:
56 | running_best_value = metrics[metric_ind]
57 | running_best_metrics = metrics
58 | running_best_gamma = g
59 | running_best_struct = mea_cls.structure
60 |
61 | best_metrics.append(running_best_metrics)
62 | best_metric_values.append(running_best_value)
63 | best_gammas.append(running_best_gamma)
64 | best_structs.append(running_best_struct)
65 |
66 | if verbose: print("%s\t%s\t%d\t%.3f\t%s" % (metric, pdb_indices[i], running_best_gamma, running_best_value, running_best_struct))
67 |
68 | # print('Avg metrics across gamma vals')
69 |
70 | print('\t\tlog2(g)\tsen\tppv\tmcc\tfscore')
71 |
72 | for g in gamma_vals:
73 |
74 | [sen, ppv, mcc, fscore] = np.mean(metrics_across_gammas[g], axis=0)
75 | print('gamma_avg\t%d\t%.3f\t%.3f\t%.3f\t%.3f' % (g, sen, ppv, mcc, fscore))
76 |
77 | # print('Best avg metrics using individual gammas')
78 | [sen, ppv, mcc, fscore] = np.mean(np.array(best_metrics), axis=0)
79 |
80 | print('gamma_best\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f' % (np.mean(best_gammas), sen, ppv, mcc, fscore))
81 |
82 | if not os.path.exists(output_dir):
83 | os.makedirs(output_dir)
84 |
85 | for struct, ind in list(zip(best_structs, pdb_indices)):
86 | if os.path.exists('%s/%s.dbn' % (output_dir, ind)):
87 | print('NB: overwriting existing predicted structure')
88 | with open('%s/%s.dbn' % (output_dir, ind), 'w') as f:
89 | f.write(struct)
90 |
91 | return best_structs
92 |
93 | def score_against_true_structs(pred_struct_list, true_struct_list, verbose=False, weight_by_n_bps=False):
94 | '''Score maximum expected pseudoaccuracy structures against provided 3D structures.
95 |
96 | Note: Files in matrix_dir and true_structs need to have the same names corresponding
97 | to their same constructs, but suffixes don't matter.
98 |
99 | Inputs:
100 |
101 | pred_struct_list: list of predicted structures.
102 | true_structs: list of NxN true structure base pair matrices. Can be
103 | symmetric matrices or not; upper triangle is taken.
104 | verbose: print output or not (for command line use)
105 |
106 | Outputs:
107 |
108 | SEN: TP/(TP+FN), library keyed by gamma values used.
109 | PPV: TP/(TP+FP), "
110 | MCC: Mathews correlation coefficient
111 | Fscore: 2*TP/(2*TP + FP + FN)
112 |
113 | '''
114 | pred_structs, true_structs = [], []
115 |
116 | if len(pred_struct_list) == 0:
117 | raise ValueError('No predicted structure files found!')
118 |
119 | if len(true_struct_list) == 0:
120 | raise ValueError('No ground truth structure files found!')
121 |
122 | for x in pred_struct_list:
123 | for s in true_struct_list:
124 | if os.path.basename(x).split('.')[0] in s:
125 |
126 | pstruct = load_matrix_or_dbn(x)
127 | pred_structs.append(pstruct)
128 |
129 | struct = load_matrix_or_dbn(s)
130 | true_structs.append(struct)
131 |
132 | assert len(pred_structs) == len(true_structs)
133 |
134 | tally, ptl_sen, ptl_ppv, ptl_mcc, ptl_fscore = 0, 0, 0, 0, 0
135 |
136 | pdb_indices = [os.path.basename(x).split('.')[0] for x in pred_struct_list]
137 |
138 | for i in range(len(pred_structs)):
139 |
140 | sen, ppv, mcc, fscore, N = score_ground_truth(pred_structs[i], true_structs[i])
141 | print('Score:\t%s\t%.3f\t%.3f\t%.3f\t%.3f' % (pdb_indices[i], sen, ppv, mcc, fscore))
142 |
143 | if weight_by_n_bps:
144 | ptl_sen += sen*N
145 | ptl_ppv += ppv*N
146 | ptl_mcc += mcc*N
147 | ptl_fscore += fscore*N
148 | tally += N
149 |
150 | else:
151 | ptl_sen += sen
152 | ptl_ppv += ppv
153 | ptl_mcc += mcc
154 | ptl_fscore += fscore
155 | tally += 1
156 |
157 | mean_sen = ptl_sen/tally
158 | mean_ppv = ptl_ppv/tally
159 | mean_mcc = ptl_mcc/tally
160 | mean_fscore = ptl_fscore/tally
161 |
162 | print("Avg:\tsen\tppv\tmcc\tfscore\n\t%.3f\t%.3f\t%.3f\t%.3f" % (mean_sen, mean_ppv, mean_mcc, mean_fscore))
163 |
164 | return mean_sen, mean_ppv, mean_mcc, mean_fscore
165 |
166 | if __name__ == '__main__':
167 |
168 | parser=argparse.ArgumentParser(
169 | description='''Estimate maximum expected pseudoaccuracy structures per Hamada et al. BMC Bioinf 2010 11:586 and\
170 | score against a ground truth dataset.\n
171 |
172 | Input format: Base pair probability matrices (specified in --bp_matrices) need to have same base names
173 | as structures (specified in --true_structs, and can be either dbn strings or NxN matrices),
174 | but the extensions for both types don't matter.''')
175 |
176 | parser.add_argument('--bp_matrices','-p', nargs='+',
177 | help='path to NxN matrices of bp probabilities, i.e. `contrafold/*.bpps`.')
178 |
179 | parser.add_argument('--output_dir', '-o',
180 | help="Path to output of predicted MEA structures. Default is `MEA_output`.", default = 'MEA_output')
181 |
182 | parser.add_argument('--true_structs','-s', nargs='+',
183 | help='Optional: path to true structures, i.e. `rnaview/*.struct`. These can be dbn structures or NxN matrices.', default=None)
184 |
185 | parser.add_argument('--metric', default='mcc',
186 | help='Accuracy metric, options are `mcc`, `fscore`, `ppv`, or `sen`. Default is `mcc`.')
187 |
188 | parser.add_argument('--gamma_min',type=int, default=-7, help='Min value for log_2(gamma), default is -7')
189 | parser.add_argument('--gamma_max',type=int, default=7, help='Max value for log_2(gamma), default is 7')
190 |
191 | parser.add_argument('--weight_by_n_bps', dest='weight_by_n_bps', action='store_true',
192 | help='For scoring to true structures, weight accuracy over dataset by number of bps.\
193 | If flag not included, equal weight across constructs.')
194 |
195 | parser.add_argument('--verbose', dest='verbose', action='store_true')
196 | parser.add_argument('--score_truth_only', dest='score_truth_only', action='store_true',
197 | help='Use if MEA structures already generated and only scoring to ground truth dataset.')
198 |
199 | #print help and exit if no args
200 | if len(sys.argv)==1:
201 | parser.print_help(sys.stderr)
202 | sys.exit(1)
203 |
204 | args = parser.parse_args()
205 |
206 | #if args.true_structs:
207 | #assert len(args.bp_matrices) == len(args.true_structs)
208 |
209 | if args.verbose:
210 | print('\nRNA MEA STRUCTURE PREDICTION')
211 | print('Number of structures: %d' % len(args.bp_matrices))
212 | print('Path to first base pair matrix: %s' % args.bp_matrices[0])
213 | if args.true_structs:
214 | print('Path to first true struct: %s' % args.true_structs[0])
215 | print('\nScanning gamma for MEA structure prediction:')
216 |
217 | if not args.score_truth_only:
218 | predict_MEA_structures(args.bp_matrices, gamma_min = args.gamma_min, gamma_max = args.gamma_max, verbose=args.verbose, metric = args.metric, output_dir = args.output_dir)
219 |
220 | if args.true_structs:
221 | if args.verbose: print('\nScoring provided true structures against maximum expected pseudoaccuracy structures:')
222 | score_against_true_structs(glob('%s/*' % args.output_dir), args.true_structs, verbose=args.verbose, weight_by_n_bps=args.weight_by_n_bps)
223 |
--------------------------------------------------------------------------------
/scripts/write_bpp_matrices.py:
--------------------------------------------------------------------------------
1 | import sys, os, argparse
2 | import arnie.bpps as bpps
3 | from arnie.utils import write_matrix_to_file
4 |
5 | if __name__=='__main__':
6 | p = argparse.ArgumentParser(description=
7 | """
8 | Write base pairing probability matrices to files.
9 | """)
10 |
11 | p.add_argument("seq_dir", nargs='+',
12 | help="path to dir of *.seq files")
13 | p.add_argument("-o", help="name of output dir")
14 | p.add_argument("-p", "--package", default='vienna_2',
15 | help="Package to use")
16 |
17 | if len(sys.argv)==1:
18 | p.print_help(sys.stderr)
19 | sys.exit(1)
20 |
21 | args = p.parse_args()
22 |
23 | if not os.path.exists('./%s' % args.o):
24 | os.makedirs('./%s' % args.o)
25 |
26 | for seqfile in args.seq_dir:
27 | print(seqfile)
28 | seq=open(seqfile,'r').readlines()[-1].rstrip()
29 | seq_id = os.path.basename(seqfile).replace('.seq','')
30 | bp_matrix = bpps.bpps(seq, package=args.package)
31 | with open("%s/%s.bpps" % (args.o, seq_id),'w') as f:
32 | write_matrix_to_file(bp_matrix, f)
33 |
--------------------------------------------------------------------------------
/scripts/write_unpaired_vectors.py:
--------------------------------------------------------------------------------
1 | import sys, os, argparse
2 | import arnie.bpps as bpps
3 | import numpy as np
4 | from arnie.utils import write_vector_to_file
5 |
6 | if __name__=='__main__':
7 | p = argparse.ArgumentParser(description=
8 | """Write unpaired posterior probabilities to files.
9 | """)
10 |
11 | p.add_argument("seq_dir", nargs='+',
12 | help="path to dir of *.seq files")
13 | p.add_argument("-o", help="name of output dir")
14 | p.add_argument("-p", "--package", default='vienna_2', help="Package to use")
15 |
16 | if len(sys.argv)==1:
17 | p.print_help(sys.stderr)
18 | sys.exit(1)
19 |
20 | args = p.parse_args()
21 |
22 | if not os.path.exists('./%s' % args.o):
23 | os.makedirs('./%s' % args.o)
24 |
25 | for seqfile in args.seq_dir:
26 | print(seqfile)
27 | seq=open(seqfile,'r').readlines()[-1].rstrip()
28 | seq_id = os.path.basename(seqfile).replace('.seq','')
29 |
30 | unp_vector = 1-np.sum(bpps.bpps(seq, package=args.package),axis=0)
31 |
32 | with open("%s/%s.unp" % (args.o, seq_id),'w') as f:
33 | write_vector_to_file(unp_vector, f)
34 |
--------------------------------------------------------------------------------
/src/arnie/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/src/arnie/bpps.py:
--------------------------------------------------------------------------------
1 | import os, re, sys
2 | import subprocess as sp
3 | import random, string
4 | import numpy as np
5 | from .utils import *
6 | from .pfunc import pfunc
7 |
8 | # load package locations from yaml file, watch! global dict
9 | package_locs = load_package_locations()
10 |
11 | def bpps(sequence, package='vienna', constraint=None, pseudo=False,
12 | T=37, coaxial=True, linear=False, dna=False,
13 | motif=None, dangles=True,param_file=None,reweight=None, beam_size=100, DEBUG=False, threshknot=False,
14 | probing_signal=None, probing_kws=None,DIRLOC=None):
15 |
16 | ''' Compute base pairing probability matrix for RNA sequence.
17 |
18 | Args:
19 | sequence (str): nucleic acid sequence
20 | T (float): temperature (Celsius)
21 | linear (bool): call LinearPartition to estimate Z in Vienna or Contrafold
22 | constraint (str): structure constraint (functional in vienna, contrafold, rnastructure)
23 | motif (str): argument to vienna motif
24 | pseudo (bool): (NUPACK only) include pseudoknot calculation
25 | dangles (bool): dangles or not, specifiable for vienna, nupack
26 | dna (bool): (NUPACK only) use SantaLucia 1998 parameters for DNA
27 | coaxial (bool): coaxial stacking or not, specifiable for rnastructure, vfold
28 | noncanonical(bool): include noncanonical pairs or not (for contrafold, RNAstructure (Cyclefold))
29 | beam size (int): Beam size for LinearPartition base pair calculation.
30 | DEBUG (bool): Output command-line calls to packages.
31 | threshknot (bool): calls threshknot to predict pseudoknots (for contrafold with LinearPartition)
32 |
33 | Possible packages: 'vienna_2', 'vienna_1','contrafold_1','contrafold_2',
34 | 'nupack_95','nupack_99','rnasoft_2007','rnasoft_1999','rnastructure','vfold_0','vfold_1'
35 |
36 | Returns
37 | array: NxN matrix of base pair probabilities
38 | '''
39 | package = package.lower()
40 | try:
41 | pkg, version = package.split('_')
42 | except:
43 | pkg, version = package, None
44 |
45 | if motif is not None and pkg != 'vienna':
46 | raise ValueError('motif option can only be used with Vienna.')
47 |
48 | if pseudo and pkg != 'nupack':
49 | raise ValueError('pseudoknot option only implemented with Nupack.')
50 |
51 | if not dangles and pkg not in ['vienna','nupack']:
52 | print('Warning: %s does not support dangles options' % pkg)
53 | if not coaxial and pkg not in ['rnastructure','vfold']:
54 | print('Warning: %s does not support coaxial options' % pkg)
55 | if linear and pkg not in ['vienna','contrafold','eternafold']:
56 | print('Warning: LinearPartition only implemented for vienna, contrafold, eternafold.')
57 |
58 | if pkg=='nupack':
59 | return bpps_nupack_(sequence, version = version, dangles = dangles, T = T, pseudo=pseudo, dna=dna)
60 |
61 | elif pkg=='vfold':
62 | return bpps_vfold_(sequence, version = version, T = T, coaxial = coaxial)
63 | else:
64 |
65 | _, tmp_file = pfunc(sequence, package=package, bpps=True, linear=linear,
66 | motif=motif, constraint=constraint, T=T, coaxial=coaxial, probing_signal=probing_signal, probing_kws=probing_kws, DIRLOC=package_locs[package],
67 | dangles=dangles, param_file=param_file,reweight=reweight, beam_size=beam_size, DEBUG=DEBUG, threshknot=threshknot)
68 |
69 | if linear:
70 | #parse linearpartition output
71 | return bpps_linearpartition_(sequence, tmp_file)
72 | else:
73 |
74 | if 'contrafold' in pkg:
75 | return bpps_contrafold_(sequence, tmp_file)
76 | if package=='eternafold':
77 | return bpps_contrafold_(sequence, tmp_file)
78 | elif 'vienna' in pkg:
79 | return bpps_vienna_(sequence, tmp_file)
80 | elif 'rnasoft' in pkg:
81 | return bpps_rnasoft_(sequence, tmp_file)
82 | elif 'rnastructure' in pkg:
83 | return bpps_rnastructure_(sequence, tmp_file, coaxial=coaxial)
84 |
85 | else:
86 | raise RuntimeError('package not yet implemented')
87 |
88 | def bpps_vienna_(sequence, tmp_file):
89 |
90 | dot_fname = tmp_file
91 |
92 | probs=np.zeros([len(sequence), len(sequence)])
93 | with open(dot_fname,'r') as f:
94 | for line in f.readlines():
95 | if 'ubox' in line:
96 | try:
97 | i, j, p, _ = line.split()
98 | i, j, p = int(i)-1, int(j)-1, float(p)**2
99 | probs[i,j] = p
100 | probs[j,i] = p
101 | except:
102 | pass
103 | os.remove(dot_fname)
104 | return probs
105 |
106 | def bpps_contrafold_(sequence, tmp_file):
107 |
108 | fname = tmp_file
109 |
110 | probs=np.zeros([len(sequence), len(sequence)])
111 |
112 | for line in open(fname).readlines():
113 | if len(line.split(':')) > 1:
114 | first_ind = int(line.split()[0])-1
115 | for x in line.split()[2:]:
116 | second_ind = int(x.split(':')[0])-1
117 | p = float(x.split(':')[1])
118 | probs[first_ind, second_ind] = p
119 | probs[second_ind, first_ind] = p
120 |
121 | os.remove(fname)
122 |
123 | return probs
124 |
125 | def bpps_rnasoft_(sequence, tmp_file):
126 | fname = tmp_file
127 |
128 | probs=np.zeros([len(sequence), len(sequence)])
129 | for line in open(fname).readlines():
130 | i,j,p = int(line.split()[0]), int(line.split()[1]), float(line.split()[2])
131 | probs[i,j] = p
132 | probs[j,i] = p
133 |
134 | os.remove(fname)
135 |
136 | return probs
137 |
138 | def bpps_nupack_(sequence, version='95', T=37, dangles=True, pseudo=False,dna=False):
139 |
140 | if not version: version='95'
141 |
142 | nupack_materials={'95': 'rna1995', '99': 'rna1999'}
143 |
144 | if dna:
145 | material='dna1998'
146 | else:
147 | material=nupack_materials[version]
148 |
149 | DIR = package_locs['nupack']
150 |
151 | if dangles:
152 | dangle_option='some'
153 | else:
154 | dangle_option='none'
155 |
156 | seqfile = write([sequence])
157 |
158 | command=['%s/pairs' % DIR, '%s' % seqfile.replace('.in',''),
159 | '-T', str(T), '-material', material, '-dangles', dangle_option, '-cutoff', '0.0000000001']
160 |
161 | if pseudo:
162 | command.append('--pseudo')
163 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
164 |
165 | stdout, stderr = p.communicate()
166 |
167 | if p.returncode:
168 | raise Exception('Nupack pfunc failed: on %s\n%s' % (sequence, stderr))
169 |
170 | ppairs_file = '%s.ppairs' % seqfile.replace('.in','')
171 | os.remove(seqfile)
172 |
173 | probs=np.zeros([len(sequence), len(sequence)])
174 |
175 | with open(ppairs_file, 'r') as f:
176 | for line in f.readlines():
177 | if not line.startswith('%'):
178 | fields = line.split()
179 | if len(fields) > 1:
180 | if int(fields[1]) <= len(sequence):
181 | i, j, p = int(fields[0])-1, int(fields[1])-1, float(fields[2])
182 | probs[i,j] = p
183 | probs[j,i] = p
184 | os.remove(ppairs_file)
185 |
186 | return probs
187 |
188 | def bpps_rnastructure_(sequence, tmp_file, coaxial=True, DEBUG=False):
189 |
190 | DIR = package_locs['rnastructure']
191 |
192 | pfsfile = tmp_file #'%s/rnastructtmp.pfs' % package_locs['TMP']
193 | outfile = '%s.probs' % (tmp_file.replace('.pfs',''))
194 | command = ['%s/ProbabilityPlot' % DIR, pfsfile, outfile, '-t', '-min', '0.0000000001']
195 |
196 | probs=np.zeros([len(sequence), len(sequence)])
197 |
198 | if DEBUG: print(' '.join(command))
199 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
200 |
201 | stdout, stderr = p.communicate()
202 |
203 | if DEBUG:
204 | print('stdout')
205 | print(stdout)
206 | print('stderr')
207 | print(stderr)
208 |
209 | if p.returncode:
210 | raise Exception('RNAstructure ProbabilityPlot failed: on %s\n%s' % (seq, stderr))
211 |
212 | with open(outfile, 'r') as f:
213 | for line in f.readlines()[2:]:
214 | fields = line.split()
215 | i, j, p = int(fields[0])-1, int(fields[1])-1, 10**(-1*float(fields[2]))
216 | probs[i,j] = p
217 | probs[j,i] = p
218 |
219 | os.remove(outfile)
220 | os.remove(pfsfile)
221 | return probs
222 |
223 | def bpps_vfold_(sequence, version='0',T=37, coaxial=True, DEBUG=False):
224 | #available versions: 0 for Turner 04 params, 1 for Mfold 2.3 params
225 |
226 | DIR = package_locs["vfold"]
227 |
228 | cwd = os.getcwd()
229 | os.chdir(DIR) #vfold precompiled binaries don't work being called from elsewhere
230 |
231 | if DEBUG: print(os.getcwd())
232 |
233 | seqfile = write([sequence])
234 |
235 | outfile = filename()+'.pij'
236 |
237 | if sys.platform=="linux":
238 | platform='linux'
239 | elif sys.platform=="darwin":
240 | platform='mac'
241 | elif sys.platform=="win32":
242 | platform='win'
243 | else:
244 | raise RuntimeError('Vfold has binaries for linux, macOS, and win')
245 |
246 | command = ['./Vfold2d_npk_%s.o %d %d %s %s %d' % (platform, int(coaxial), T, seqfile, outfile, int(version))]
247 |
248 | if DEBUG: print(' '.join(command))
249 |
250 | p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE, shell=True)
251 |
252 | stdout, stderr = p.communicate()
253 | os.chdir(cwd)
254 |
255 | if DEBUG:
256 | print('stdout')
257 | print(stdout)
258 | print('stderr')
259 | print(stderr)
260 | if p.returncode:
261 | raise Exception('Vfold2d_npk failed: on %s\n%s' % (sequence, stderr))
262 |
263 | os.remove(seqfile)
264 | probs = np.zeros([len(sequence),len(sequence)])
265 | p_ij_output = np.loadtxt(outfile,usecols=(0,2,3)) #col 0: set of inds 1, col 1: set of inds 2, col 2: bpp
266 |
267 | for i,j,p in p_ij_output:
268 | probs[int(i-1),int(j-1)] = p
269 | probs[int(j-1),int(i-1)] = p
270 | os.remove(outfile)
271 |
272 | return probs
273 | #output: take second field of last line for Z
274 |
275 |
276 | def bpps_linearpartition_(sequence, tmp_file):
277 |
278 | fname = tmp_file
279 |
280 | probs=np.zeros([len(sequence), len(sequence)])
281 |
282 | for line in open(fname,'r').readlines():
283 | if len(line.strip())>0:
284 | first_ind, second_ind, p = line.strip().split(' ')
285 | first_ind = int(first_ind)-1
286 | second_ind = int(second_ind)-1
287 | p = float(p)
288 | probs[first_ind, second_ind] = p
289 | probs[second_ind, first_ind] = p
290 |
291 | os.remove(fname)
292 |
293 | return probs
294 |
--------------------------------------------------------------------------------
/src/arnie/free_energy.py:
--------------------------------------------------------------------------------
1 | import os, re, sys
2 | import subprocess as sp
3 | import random, string
4 | import numpy as np
5 | from .utils import *
6 | from .pfunc import pfunc
7 |
8 | DEBUG=False
9 |
10 | # load package locations from yaml file, watch! global dict
11 | package_locs = load_package_locations()
12 |
13 | def free_energy(seq, constraint=None, package='vienna_2', T=37, coaxial=True, dna=False, beam_size=100,
14 | pseudo=False, dangles=True, reweight=None, ensemble=True, param_file=None, linear=False,DEBUG=False):
15 | ''' Compute free energy of RNA sequence. If structure is given, computes free energy of that structure.
16 | Otherwise, returns MFE structure of sequence [NOT IMPLEMENTED YET].
17 |
18 | Args:
19 | seq (str): nucleic acid sequence
20 | constraint (str, optional): possible structure to constrain to in dot bracket notation
21 | T (float): temperature (Celsius), default 37
22 |
23 | ensemble (bool): to compute ensemble of constraint string or not.
24 | Just converts '.' to 'x' in string.
25 | If you want the free energy of just one structure,
26 | better practice is to use 'x' to denote unpaired.
27 |
28 |
29 | motif (str): argument to vienna motif
30 | beam_size (int): beam size for use in LinearPartition (Vienna, CONTRAfold, EternaFold only)
31 | dangles (bool): dangles or not, specifiable for vienna, nupack
32 | dna (bool): use SantaLucia model for DNA (NUPACK only)
33 | coaxial (bool): coaxial stacking or not, specifiable for rnastructure, vfold
34 | noncanonical(bool): include noncanonical pairs or not (for contrafold, RNAstructure (Cyclefold))
35 | pseudo (bool): include pseudoknot (nupack only)
36 | Implemented packages:
37 | 'vienna_1', 'vienna_2', 'contrafold'
38 |
39 | NB: doesn't multiply by kT for contrafold...
40 |
41 | Returns
42 | free energy (float)
43 | '''
44 | if not ensemble:
45 | constraint = constraint.replace('.','x')
46 |
47 | return pfunc(seq, package=package, T=T, dangles=dangles, coaxial=coaxial, pseudo=pseudo, dna=dna, beam_size = beam_size,
48 | constraint=constraint, reweight=reweight, param_file=param_file, return_free_energy=True, linear=linear, DEBUG=DEBUG)
49 |
50 | # if package.lower().startswith('contrafold'):
51 | # Z_constrained = pfunc(seq, package=package, T=T, dangles=dangles, constraint=constraint,param_file=param_file)
52 |
53 | # return -1* np.log(Z_constrained) # .00198 is k in kcal/mol #0.0019899*(273+T) *
54 | # else:
55 | # raise RuntimeError("%s `free_energy` not implemented yet" % package)
56 |
--------------------------------------------------------------------------------
/src/arnie/mea/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/src/arnie/mea/__init__.py
--------------------------------------------------------------------------------
/src/arnie/mea/mea.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import argparse, sys
3 | from arnie.mea.mea_utils import *
4 | from copy import copy
5 |
6 | class MEA:
7 | def __init__(self, bpps, gamma = 1.0, debug=False, run_probknot_heuristic = False, theta=0, stochastic=False):
8 | self.debug = debug
9 | self.bpps = bpps
10 | self.N=self.bpps.shape[0]
11 | self.gamma = gamma
12 | self.theta = theta
13 | self.W = np.zeros([self.N,self.N])
14 | self.MEA_bp_list = []
15 | self.structure = ['.']*self.N
16 | self.MEA_bp_matrix = np.zeros([self.N, self.N])
17 | self.tb = np.zeros([self.N, self.N])
18 | self.min_hp_length = 3
19 | self.evaluated = False
20 | self.stochastic = stochastic
21 |
22 | if run_probknot_heuristic:
23 | self.run_ProbKnot()
24 | else:
25 | self.run_MEA()
26 |
27 | def fill_W(self, i, j):
28 | if self.stochastic:
29 | options = [self.W[i+1, j], self.W[i, j-1],\
30 | (self.gamma+1)*self.bpps[i,j] + self.W[i+1, j-1] - 1,\
31 | np.max([self.W[i,k] + self.W[k+1, j] for k in range(i+1,j)])]
32 | option_wts = options - np.min(options)
33 | option_wts /= np.sum(option_wts)
34 | selection = np.random.choice([0,1,2,3],p=option_wts)
35 | self.W[i,j] = options[selection]
36 | self.tb[i,j] = selection #0: 5' pass, 1: 3' pass, 2: bp, 3: multiloop
37 |
38 | else:
39 | options = [self.W[i+1, j], self.W[i, j-1],\
40 | (self.gamma+1)*self.bpps[i,j] + self.W[i+1, j-1] - 1,\
41 | np.max([self.W[i,k] + self.W[k+1, j] for k in range(i+1,j)])]
42 | self.W[i,j] = np.max(options)
43 | self.tb[i,j] = np.argmax(options) #0: 5' pass, 1: 3' pass, 2: bp, 3: multiloop
44 |
45 | def run_MEA(self):
46 | # fill weight matrix
47 | for length in range(self.min_hp_length, self.N):
48 | for i in range(self.N-length):
49 | j = i + length
50 | self.fill_W(i,j)
51 |
52 | self.traceback(0,self.N-1)
53 |
54 | for x in self.MEA_bp_list:
55 | self.MEA_bp_matrix[x[0],x[1]]=1
56 | self.structure[x[0]]='('
57 | self.structure[x[1]]=')'
58 |
59 | self.structure = ''.join(self.structure)
60 | if not self.evaluated: self.evaluated = True
61 |
62 | def run_ProbKnot(self):
63 |
64 | #Threshknot step: filter out bps below cutoff theta
65 | threshknot_filter = np.where(self.bpps <= self.theta)
66 | filtered_bpps = copy(self.bpps)
67 | filtered_bpps[threshknot_filter] = 0
68 |
69 | output = np.zeros([self.N, self.N])
70 |
71 | # ProbKnot heuristic part 1: get all base pairs where p(ij) == p_max(i)
72 | output[np.where(self.bpps == np.max(self.bpps, axis=0))] = 1
73 |
74 | # ProbKnot heuristic part 2: get all base pairs where p(ij) == p_max(j)
75 | self.MEA_bp_matrix = np.clip(output+np.transpose(output)-1,0,1)
76 |
77 | for [i, j] in np.array(np.where(self.MEA_bp_matrix == 1)).T:
78 | if np.abs(i - j) > 1:
79 | if [j,i] not in self.MEA_bp_list:
80 | self.MEA_bp_list.append([i,j])
81 | #self.structure[i] = '('
82 | #self.structure[j] = ')'
83 | #print('Warning: formatting pseudoknotted dot-bracket structures not yet supported. Any pseudoknotted stems will only appear as parentheses (not brackets).')
84 | #self.structure = ''.join(self.structure)
85 | self.structure = convert_bp_list_to_dotbracket(self.MEA_bp_list,len(self.bpps))
86 |
87 | if not self.evaluated: self.evaluated = True
88 |
89 | def traceback(self, i, j):
90 | if j <= i:
91 | return
92 | elif self.tb[i,j] == 0: #5' neighbor
93 | if self.debug: print(i,j, "5'")
94 | self.traceback(i+1,j)
95 | elif self.tb[i,j] == 1: #3' neighbor
96 | if self.debug: print(i,j, "3'")
97 | self.traceback(i,j-1)
98 | elif self.tb[i,j] == 2: # base pair
99 | if self.debug: print(i,j,'bp')
100 | self.MEA_bp_list.append((i,j))
101 | self.traceback(i+1,j-1)
102 | else: #multiloop
103 | for k in range(i+1,j):
104 | if self.W[i,j] == self.W[i, k] + self.W[k+1,j]:
105 | if self.debug: print(i,j,"multiloop, k=",k)
106 | self.traceback(i,k)
107 | self.traceback(k+1,j)
108 | break
109 |
110 | def score_expected(self):
111 | '''Compute expected values of TP, FP, etc from predicted MEA structure.
112 |
113 | Returns:
114 | pseudoexpected SEN, PPV, MCC, F-score'''
115 |
116 | if not self.evaluated:
117 | if run_probknot_heuristic:
118 | self.run_ProbKnot()
119 | else:
120 | self.run_MEA()
121 |
122 | pred_m = self.MEA_bp_matrix[np.triu_indices(self.N)]
123 | probs = self.bpps[np.triu_indices(self.N)]
124 |
125 | TP = np.sum(np.multiply(pred_m, probs)) + 1e-6
126 | TN = 0.5*self.N*self.N-1 - np.sum(pred_m) - np.sum(probs) + TP + 1e-6
127 | FP = np.sum(np.multiply(pred_m, 1-probs)) + 1e-6
128 | FN = np.sum(np.multiply(1-pred_m, probs)) + 1e-6
129 |
130 | a,b = np.triu_indices(self.N)
131 | cFP = 1e-6
132 | # for i in range(len(pred_m)):
133 | # if np.sum(self.MEA_bp_matrix,axis=0)[a[i]] + np.sum(self.MEA_bp_matrix,axis=0)[b[i]]==0:
134 | # cFP += np.multiply(pred_m[i], 1-probs[i])
135 |
136 | sen = TP/(TP + FN)
137 | ppv = TP/(TP + FP - cFP)
138 | mcc = (TP*TN - (FP - cFP)*FN)/np.sqrt((TP + FP - cFP)*(TP + FN)*(TN + FP - cFP)*(TN + FN))
139 | fscore = 2*TP/(2*TP + FP - cFP + FN)
140 |
141 | return [sen, ppv, mcc, fscore]
142 |
143 | def score_ground_truth(self, ground_truth_struct, allow_pseudoknots=False):
144 | if len(ground_truth_struct[0])==1:
145 | gt_matrix = convert_dotbracket_to_matrix(ground_truth_struct)
146 | else:
147 | gt_matrix = ground_truth_struct
148 |
149 | if not self.evaluated: self.run_MEA()
150 | sen, ppv, mcc, fscore, _ = score_ground_truth(self.MEA_bp_matrix, gt_matrix)
151 | return [sen, ppv, mcc, fscore]
152 |
--------------------------------------------------------------------------------
/src/arnie/mea/mea_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import argparse, sys
3 |
4 | def convert_dotbracket_to_matrix(s):
5 | m = np.zeros([len(s),len(s)])
6 | for char_set in [['(',')'], ['[',']'],['{','}'],['<','>']]:
7 | bp1=[]
8 | bp2=[]
9 | for i, char in enumerate(s):
10 | if char==char_set[0]:
11 | bp1.append(i)
12 | if char==char_set[1]:
13 | bp2.append(i)
14 | for i in list(reversed(bp1)):
15 | for j in bp2:
16 | if j > i:
17 | m[i,j]=1.0
18 | bp2.remove(j)
19 | break
20 | return m
21 |
22 |
23 |
24 | def convert_matrix_to_dotbracket(m):
25 | bp_list = convert_matrix_to_bp_list(m)
26 | return convert_bp_list_to_dotbracket(bp_list,len(m))
27 |
28 | def convert_matrix_to_bp_list(m):
29 | bp_list = [] # convert adjacency matrix to adjacency list
30 | for i,row in enumerate(m):
31 | for j,is_bp in enumerate(row[i+1:]):
32 | if is_bp:
33 | bp_list.append([i,i+1+j])
34 | return bp_list
35 |
36 |
37 | def convert_bp_list_to_dotbracket(bp_list,seq_len):
38 | dotbracket = "."*seq_len
39 | # group into bps that are not intertwined and can use same brackets!
40 | groups = group_into_non_conflicting_bp_(bp_list)
41 |
42 | # all bp that are not intertwined get (), but all others are
43 | # groups to be nonconflicting and then asigned (), [], {}, <> by group
44 | chars_set = [("(",")"),("(",")"),("[","]"),("{","}"),("<",">")]
45 | if len(groups) > len(chars_set):
46 | print("WARNING: PK too complex, not enough brackets to represent it.")
47 |
48 | for group,chars in zip(groups,chars_set):
49 | for bp in group:
50 | dotbracket = dotbracket[:bp[0]] + chars[0] + dotbracket[bp[0]+1:bp[1]] + chars[1] + dotbracket[bp[1]+1:]
51 | return dotbracket
52 |
53 |
54 | def load_matrix_or_dbn(s):
55 | num_lines = sum(1 for line in open(s))
56 |
57 | if num_lines > 2: #heuristic here
58 | struct = np.loadtxt(s) # load as base pair matrix
59 | assert struct.shape[0] == struct.shape[1]
60 | else:
61 | try: # load as dot-bracket string
62 |
63 | dbn_struct = open(s,'r').read().rstrip()
64 |
65 | struct = convert_dotbracket_to_matrix(dbn_struct)
66 | except:
67 | raise ValueError('Unable to parse structure %s' % s)
68 | return struct
69 |
70 | def score_ground_truth(pred_matrix, true_matrix):
71 | '''Score a predicted structure against a true structure,
72 | input as NxN base pair matrix (takes top triangle).'''
73 |
74 | N = pred_matrix.shape[0]
75 | #print('pred',pred_matrix.shape, 'true', true_matrix.shape)
76 | assert pred_matrix.shape[1] == N
77 | assert true_matrix.shape[0] == N
78 | assert true_matrix.shape[1] == N
79 |
80 | true = true_matrix[np.triu_indices(N)]
81 | pred = pred_matrix[np.triu_indices(N)]
82 |
83 | TP, FP, cFP, TN, FN = 0, 0, 0, 0, 0
84 |
85 | for i in range(len(true)):
86 | if true[i] == 1:
87 | if pred[i] == 1:
88 | TP += 1
89 | else:
90 | FN += 1
91 | elif true[i] == 0:
92 | if pred[i] == 0:
93 | TN += 1
94 | else:
95 | FP += 1
96 | #check for compatible false positive
97 | a,b = np.triu_indices(N)
98 | if np.sum(true_matrix,axis=0)[a[i]]+ np.sum(true_matrix,axis=0)[b[i]]==0:
99 | cFP +=1
100 |
101 | # cFP = 0 #for debugging
102 |
103 | #print('TP', TP, 'TN', TN, 'FP', FP, 'FN', FN, 'cFP', cFP)
104 |
105 | if TP + FN == 0:
106 | sen = 1
107 | else:
108 | sen = TP/(TP + FN)
109 |
110 | if TP + FP - cFP == 0:
111 | ppv = 1
112 | else:
113 | ppv = TP/(TP + FP - cFP)
114 |
115 | mcc_num = (TP*TN - (FP - cFP)*FN)
116 | mcc_denom = np.sqrt((TP + FP - cFP)*(TP + FN)*(TN + FP - cFP)*(TN + FN))
117 |
118 | if mcc_denom == 0:
119 | mcc = mcc_num
120 | else:
121 | mcc = mcc_num/mcc_denom
122 |
123 | if ppv + sen == 0:
124 | fscore = 0
125 | else:
126 | fscore = 2*ppv*sen/(ppv+sen)
127 |
128 | return sen, ppv, mcc, fscore, N
129 |
130 |
131 | def group_into_non_conflicting_bp_(bp_list):
132 | ''' given a conflict list from get_list_bp_conflicts_, group basepairs into groups that do not conflict
133 |
134 | Args
135 | conflict_list: list of pairs of base_pairs that are intertwined basepairs
136 |
137 | Returns:
138 | groups of baspairs that are not intertwined
139 | '''
140 | conflict_list = get_list_bp_conflicts_(bp_list)
141 |
142 | non_redudant_bp_list = get_non_redudant_bp_list_(conflict_list)
143 | bp_with_no_conflict = [bp for bp in bp_list if bp not in non_redudant_bp_list]
144 | groups = [bp_with_no_conflict]
145 | while non_redudant_bp_list != []:
146 | current_bp = non_redudant_bp_list[0]
147 | current_bp_conflicts = []
148 | for conflict in conflict_list:
149 | if current_bp == conflict[0]:
150 | current_bp_conflicts.append(conflict[1])
151 | elif current_bp == conflict[1]:
152 | current_bp_conflicts.append(conflict[0])
153 | group = [bp for bp in non_redudant_bp_list if bp not in current_bp_conflicts]
154 | groups.append(group)
155 | non_redudant_bp_list = current_bp_conflicts
156 | conflict_list = [conflict for conflict in conflict_list if conflict[0] not in group and conflict[1] not in group]
157 | return groups
158 |
159 |
160 | def get_list_bp_conflicts_(bp_list):
161 | '''given a bp_list gives the list of conflicts bp-s which indicate PK structure
162 | Args:
163 | bp_list: of list of base pairs where the base pairs are list of indeces of the bp in increasing order (bp[0]