├── .github └── workflows │ └── create-release.yml ├── .gitignore ├── .versipy └── README.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MetaCompore ├── Snakefile ├── environment.yaml ├── example_Run_snakemake.sh ├── example_config.yaml ├── example_lsf.yaml ├── example_samples.tsv ├── resources │ ├── f5c │ │ └── r9.4_70bps.u_to_t_rna.5mer.template.model │ └── mines │ │ ├── AGACT_random_forest_model.pickle │ │ ├── GGACA_random_forest_model.pickle │ │ ├── GGACC_random_forest_model.pickle │ │ ├── GGACT_random_forest_model.pickle │ │ └── names.txt └── workflow │ ├── rules │ ├── alignment.smk │ ├── basecalling.smk │ ├── common.smk │ ├── differr.smk │ ├── eligos2.smk │ ├── epinano.smk │ ├── input.smk │ ├── mines.smk │ ├── nanocompore.smk │ ├── quality_control.smk │ ├── resquiggling.smk │ └── tombo.smk │ ├── schemas │ ├── config.schema.yaml │ └── samples.schema.yaml │ └── scripts │ ├── differr_compare.py │ ├── differr_postprocess.py │ ├── eligos2_pair_diff_mod.py │ ├── eligos2_postprocess.py │ ├── epinano_filter_kmers.py │ ├── get_transcriptome.py │ ├── min_ref_coverage.py │ ├── mines_postprocess.py │ ├── nanocompore_postprocess.py │ ├── nanocompore_sampcomp.py │ ├── tombo_postprocess.py │ └── tombo_preprocess.py ├── README.md ├── versipy.yaml └── versipy_history.txt /.github/workflows/create-release.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | tags: 4 | - '*' 5 | 6 | name: Upload Release Asset 7 | 8 | jobs: 9 | build: 10 | name: Upload Release Asset 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v2 15 | - name: Build project 16 | run: | 17 | tar czf MetaCompore.tar.gz MetaCompore/ 18 | - name: Create Release 19 | id: create_release 20 | uses: actions/create-release@v1 21 | env: 22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 23 | with: 24 | tag_name: ${{ github.ref }} 25 | release_name: Release ${{ github.ref }} 26 | draft: false 27 | prerelease: false 28 | - name: Upload Release Asset 29 | id: upload-release-asset 30 | uses: actions/upload-release-asset@v1 31 | env: 32 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 33 | with: 34 | upload_url: ${{ steps.create_release.outputs.upload_url }} 35 | asset_path: MetaCompore.tar.gz 36 | asset_name: MetaCompore.tar.gz 37 | asset_content_type: application/zip 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Snakemake 132 | .snakemake 133 | data 134 | logs 135 | results 136 | .panoptes.db 137 | *.ipynb 138 | .config/ 139 | -------------------------------------------------------------------------------- /.versipy/README.md: -------------------------------------------------------------------------------- 1 | # __package_name__ v__package_version__ 2 | 3 | [![Snakemake](https://img.shields.io/badge/snakemake-≥5.30.1-brightgreen.svg)](https://snakemake.bitbucket.io) 4 | [![DOI](https://zenodo.org/badge/312304999.svg)](https://zenodo.org/badge/latestdoi/312304999) 5 | 6 | 7 | --- 8 | 9 | **__package_description__** 10 | 11 | At the moment MetaCompore supports the following tools: 12 | * NanoCompore v1.03 :https://github.com/tleonardi/nanocompore/ 13 | * Epinano v1.02: https://github.com/enovoa/EpiNano 14 | * Eligos2 v2.0.0: https://gitlab.com/piroonj/eligos2 15 | * Tombo v1.5.1: https://github.com/nanoporetech/tombo 16 | * MINES: https://github.com/YeoLab/MINES 17 | * differr_nanopore_DRS: https://github.com/bartongroup/differr_nanopore_DRS 18 | 19 | ## Authors 20 | 21 | * Adrien Leger (@a-slide) 22 | * Tommaso Leonardi (@tleonardi) 23 | 24 | ## Usage 25 | 26 | ### Step 1: Obtain a copy of this workflow 27 | 28 | Clone the last tarball archive of the pipeline to your local system, into the location where you want to perform the data analysis 29 | 30 | ``` 31 | wget https://github.com/a-slide/MetaCompore/releases/download/__package_version__/MetaCompore.tar.gz 32 | tar xzf MetaCompore.tar.gz 33 | cd MetaCompore 34 | ``` 35 | 36 | ### Step 2: Install dependencies 37 | 38 | #### Singularity 39 | 40 | If required, install singularity following the official documentation: https://sylabs.io/guides/3.7/user-guide/quick_start.html 41 | 42 | #### Conda / Mamba 43 | 44 | Install miniconda following the official documentation: https://docs.conda.io/en/latest/miniconda.html 45 | 46 | you can also install mamba to speed up snakemake installation: https://github.com/mamba-org/mamba 47 | 48 | #### Snakemake 49 | 50 | Create a virtual environment containing snakemake with [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) 51 | ``` 52 | conda env create -f environment.yaml 53 | ``` 54 | 55 | You can also use [mamba](https://github.com/mamba-org/mamba) which will give you the same result, but much faster 56 | 57 | ``` 58 | mamba env create -f environment.yaml 59 | ``` 60 | 61 | ### Step 3: Configure the workflow 62 | 63 | Configure the workflow according to your needs by editing the files `config.yaml` to configure the workflow execution 64 | 65 | ``` 66 | nano config.yaml 67 | ``` 68 | 69 | Edit the `samples.tsv` to specify your sample setup and fast5 source files 70 | 71 | ``` 72 | nano samples.tsv 73 | ``` 74 | 75 | ### Step 4: Execute workflow 76 | 77 | ### Local Mode 78 | 79 | Activate the conda environment: 80 | 81 | ``` 82 | conda activate snakemake 83 | snakemake --use-singularity -j 4 84 | ``` 85 | 86 | ### LSF cluster Mode 87 | 88 | Set an LSF cluster profile https://github.com/Snakemake-Profiles/lsf 89 | 90 | Edit the lsf rule specific config file `lsf.yaml` 91 | 92 | 93 | ## Disclaimer 94 | 95 | Please be aware that __package_name__ is a research package that is still under development. 96 | 97 | It was tested under Linux Ubuntu 16.04 and in an HPC environment running under Red Hat Enterprise 7.1. 98 | 99 | Thank you 100 | 101 | ## citation 102 | 103 | __citation__ 104 | 105 | ## licence 106 | 107 | __package_licence__ (__package_licence_url__) 108 | 109 | Copyright © 2020 __author_name__ 110 | 111 | ## Authors 112 | 113 | * __author_name__ / __author_email__ / __author_url__ 114 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at aleg@ebi.ac.uk. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to MetaCompore 2 | 3 | First of all, thanks for considering contributing to `MetaCompore`! 👍 It's people like you that make it rewarding for us to work on `MetaCompore`. 4 | 5 | `MetaCompore` is an open source project, maintained by publicly funded academic researchers. 6 | 7 | [repo]: https://github.com/a-slide/MetaCompore 8 | [issues]: https://github.com/a-slide/MetaCompore/issues 9 | [new_issue]: https://github.com/a-slide/MetaCompore/issues/new 10 | [citation]: https://zenodo.org/badge/latestdoi/173960745 11 | [email]: aleg@ebi.ac.uk 12 | [code_of_conduct]: https://github.com/a-slide/MetaCompore/blob/master/CODE_OF_CONDUCT.md 13 | 14 | ## Code of conduct 15 | 16 | Please note that this project is released with a [Contributor Code of Conduct][code_of_conduct]. By participating in this project you agree to abide by its terms. 17 | 18 | ## How you can contribute 19 | 20 | There are several ways you can contribute to this project. If you want to know more about why and how to contribute to open source projects like this one, see this [Open Source Guide](https://opensource.guide/how-to-contribute/). 21 | 22 | ### Share the love ❤️ 23 | 24 | Think `MetaCompore` is useful? Let others discover it, by telling them in person, via Twitter or a blog post. 25 | 26 | Using `MetaCompore` for a paper you are writing? Please cite it using this [DOI][citation] 27 | 28 | ### Ask a question ⁉️ 29 | 30 | Using `MetaCompore` and got stuck? Browse the [documentation][repo] to see if you can find a solution. 31 | 32 | Still stuck? Post your question as an [issue on GitHub][new_issue]. 33 | While we cannot offer user support, we'll try to do our best to address it, as questions often lead to better documentation or the discovery of bugs. 34 | 35 | Want to ask a question in private? Contact the package maintainer by [email][email]. 36 | 37 | ### Propose an idea 💡 38 | 39 | Have an idea for a new `MetaCompore` feature? Take a look at the [issue list][issues] to see if it isn't included or suggested yet. If not, suggest your idea as an [issue on GitHub][new_issue]. While we can't promise to implement your idea, it helps to: 40 | 41 | * Explain in detail how it would work. 42 | * Keep the scope as narrow as possible. 43 | 44 | See below if you want to contribute code for your idea as well. 45 | 46 | ### Report a bug 🐛 47 | 48 | Using `MetaCompore` and discovered a bug? That's annoying! Don't let others have the same experience and report it as an [issue on GitHub][new_issue] so we can fix it. A good bug report makes it easier for us to do so, so please include: 49 | 50 | * Your operating system name and version (e.g. Mac OS 10.13.6). 51 | * Any details about your local setup that might be helpful in troubleshooting. 52 | * Detailed steps to reproduce the bug. 53 | 54 | ### Improve the documentation 📖 55 | 56 | Noticed a typo on the website? Think a function could use a better example? Good documentation makes all the difference, so your help to improve it is very welcome! 57 | 58 | 1. Fork [this repo][repo] and clone it to your computer. To learn more about this process, see [this guide](https://guides.github.com/activities/forking/). 59 | 2. Edit the README.md file and submit a pull request. We will review your changes and include the fix in the next release. 60 | 61 | ### Contribute code 📝 62 | 63 | Care to fix bugs or implement new functionality for `MetaCompore`? Awesome! 👏 Have a look at the [issue list][issues] and leave a comment on the things you want to work on. See also the development guidelines below. 64 | 65 | ## Development guidelines 66 | 67 | We try to follow the [GitHub flow](https://guides.github.com/introduction/flow/) for development and the [PEP 8](https://www.python.org/dev/peps/pep-0008/) style Guide for Python Code. 68 | 69 | 1. Fork [this repo][repo] and clone it to your computer. To learn more about this process, see [this guide](https://guides.github.com/activities/forking/). 70 | 71 | 2. If you have forked and cloned the project before and it has been a while since you worked on it, [pull changes from the original repo](https://help.github.com/articles/merging-an-upstream-repository-into-your-fork/) to your clone by using `git pull upstream master`. 72 | 73 | 3. Make your changes and test the modified code. 74 | 75 | 4. Commit and push your changes. 76 | 77 | 5. Submit a [pull request](https://guides.github.com/activities/forking/#making-a-pull-request). 78 | 79 | 80 | --- 81 | 82 | This file was adapted from a template created by [peterdesmet](https://gist.github.com/peterdesmet/e90a1b0dc17af6c12daf6e8b2f044e7c). 83 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020, Adrien Leger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MetaCompore/Snakefile: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | from os.path import join 5 | from snakemake.logging import logger 6 | from snakemake.utils import min_version 7 | from glob import glob 8 | min_version("5.30.0") 9 | 10 | include: "workflow/rules/common.smk" 11 | 12 | ##### load config and sample sheets ##### 13 | 14 | logger.info("Loading and checking configuration file") 15 | config = config_load_validate(configfile="config.yaml", schema="workflow/schemas/config.schema.yaml") 16 | 17 | logger.info("Loading and checking sample file") 18 | samples_df = samples_load_validate(samplefile="samples.tsv", schema="workflow/schemas/samples.schema.yaml") 19 | replicates_list=list(samples_df["replicate"].unique()) 20 | condition_list=list(samples_df["condition"].unique()) 21 | logger.info(f"replicates found: {replicates_list}") 22 | logger.info(f"condition found: {condition_list}") 23 | 24 | ##### Define all output files depending on config file ##### 25 | 26 | logger.info("Defining target files") 27 | target_files=[] 28 | 29 | # Add input target files 30 | target_files.extend(expand(join("results", "alignment", "alignmemt_postfilter", "{cond}_{rep}.bam"), cond=condition_list, rep=replicates_list)) 31 | target_files.extend(expand(join("results", "alignment", "alignmemt_merge", "{cond}.bam"), cond=condition_list)) 32 | target_files.extend(expand(join("results", "resquiggling", "f5c_eventalign", "{cond}_{rep}_data.tsv"), cond=condition_list, rep=replicates_list)) 33 | target_files.extend(expand(join("results", "resquiggling", "f5c_eventalign", "{cond}_{rep}_summary.tsv"), cond=condition_list, rep=replicates_list)) 34 | 35 | if config.get("quality_control", None): 36 | logger.info("Defining target files for `quality_control` rules") 37 | target_files.extend(expand(join("results", "quality_control", "pycoQC", "pycoQC_{cond}_{rep}.json"), cond=condition_list, rep=replicates_list)) 38 | target_files.extend(expand(join("results", "quality_control", "pycoQC", "pycoQC_{cond}_{rep}.html"), cond=condition_list, rep=replicates_list)) 39 | 40 | if config.get("nanocompore", None): 41 | logger.info("Defining target files for `nanocompore` rules") 42 | target_files.append(join("results", "final", "nanocompore_results_GMM_context_0.tsv")) 43 | target_files.append(join("results", "final", "nanocompore_results_GMM_context_2.tsv")) 44 | target_files.append(join("results", "final", "nanocompore_results_KS_dwell_context_0.tsv")) 45 | target_files.append(join("results", "final", "nanocompore_results_KS_dwell_context_2.tsv")) 46 | target_files.append(join("results", "final", "nanocompore_results_KS_intensity_context_0.tsv")) 47 | target_files.append(join("results", "final", "nanocompore_results_KS_intensity_context_2.tsv")) 48 | 49 | if config.get("tombo", None): 50 | logger.info("Defining target files for `tombo` rules") 51 | target_files.append(join("results", "final", "tombo_results.tsv")) 52 | 53 | if config.get("differr", None): 54 | logger.info("Defining target files for `differr` rules") 55 | target_files.append(join("results", "final", "differr_results.tsv")) 56 | 57 | if config.get("eligos2", None): 58 | logger.info("Defining target files for `eligos2` rules") 59 | target_files.append(join("results", "final", "eligos2_results.tsv")) 60 | 61 | if config.get("mines", None): 62 | logger.info("Defining target files for `mines` rules") 63 | target_files.append(join("results", "final", "mines_results.tsv")) 64 | 65 | # if config.get("xpore", None): 66 | # target_files.append("xpore_out_files") 67 | 68 | if config.get("epinano", None): 69 | target_files.append(join("results", "final", "epinano_results.tsv")) 70 | 71 | ##### Set main rule ##### 72 | 73 | rule all: 74 | input: target_files 75 | 76 | ##### Snakemake Include ##### 77 | 78 | include: "workflow/rules/input.smk" 79 | include: "workflow/rules/basecalling.smk" 80 | include: "workflow/rules/alignment.smk" 81 | include: "workflow/rules/resquiggling.smk" 82 | include: "workflow/rules/quality_control.smk" 83 | include: "workflow/rules/nanocompore.smk" 84 | include: "workflow/rules/tombo.smk" 85 | include: "workflow/rules/differr.smk" 86 | include: "workflow/rules/eligos2.smk" 87 | include: "workflow/rules/mines.smk" 88 | # include: "workflow/rules/xpore.smk" 89 | include: "workflow/rules/epinano.smk" 90 | -------------------------------------------------------------------------------- /MetaCompore/environment.yaml: -------------------------------------------------------------------------------- 1 | name: MetaCompore 2 | 3 | channels: 4 | - bioconda 5 | - conda-forge 6 | 7 | dependencies: 8 | - python=3.8 9 | - snakemake>=5.30.1 10 | -------------------------------------------------------------------------------- /MetaCompore/example_Run_snakemake.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | 4 | snakemake $* --use-singularity -j 4 --keep-going 5 | -------------------------------------------------------------------------------- /MetaCompore/example_config.yaml: -------------------------------------------------------------------------------- 1 | 2 | ##### Source files ##### 3 | 4 | # Path to an ENSEMBL FASTA reference transcriptome file/URL to be used for read mapping (local, FTP or HTTP) 5 | 6 | transcriptome_ref: "/home/tleonardi/programming/bioinformatics/MetaCompore/fast5_subset/reference_transcriptome_selected.fa" 7 | 8 | ##### Conditional execution of pipeline ##### 9 | 10 | gpu_acceleration: False 11 | quality_control: False 12 | nanocompore: True 13 | tombo: False 14 | differr: True 15 | eligos2: True 16 | mines: False 17 | xpore: False 18 | epinano: True 19 | 20 | ##### Individual rules configuration ##### 21 | 22 | # Input data parameters 23 | 24 | get_transcriptome: 25 | threads: 1 26 | mem_mb: 1000 27 | opt: "" 28 | 29 | # Basecalling parameters 30 | 31 | ont_guppy: 32 | threads: 4 33 | mem_mb: 6000 34 | opt: "-c rna_r9.4.1_70bps_hac.cfg --records_per_fastq 0 --recursive --disable_pings --calib_detect --num_callers 4 --cpu_threads_per_caller 1" 35 | # example GPU options if using gpu_basecalling instead 36 | #opt: "-c rna_r9.4.1_70bps_hac.cfg --records_per_fastq 0 --recursive --disable_pings --calib_detect --num_callers 8 --gpu_runners_per_device 1 --device 'auto'" 37 | 38 | merge_fastq: 39 | threads: 1 40 | mem_mb: 1000 41 | opt: "--remove_duplicates --min_len 100 --min_qual 7" 42 | 43 | # Alignment parameters 44 | 45 | minimap2_index: 46 | threads: 1 47 | mem_mb: 1000 48 | opt: "" 49 | 50 | minimap2_align: 51 | threads: 4 52 | mem_mb: 1000 53 | opt: "-a -L -x map-ont" 54 | 55 | alignmemt_prefilter: 56 | threads: 1 57 | mem_mb: 1000 58 | opt: "--skip_unmapped --skip_secondary --skip_supplementary --index_reads --orientation '+' --min_read_len 100 --min_align_len 100 --min_mapq 10 --min_freq_identity 0.8" 59 | 60 | min_ref_coverage: 61 | threads: 1 62 | mem_mb: 1000 63 | opt: 64 | min_cov: 30 65 | 66 | alignmemt_postfilter: 67 | threads: 1 68 | mem_mb: 1000 69 | opt: "--index_reads" 70 | 71 | alignmemt_merge: 72 | threads: 4 73 | mem_mb: 1000 74 | opt: "" 75 | 76 | # QC parameters 77 | 78 | pycoQC: 79 | threads: 1 80 | mem_mb: 1000 81 | opt: "--skip_coverage_plot --filter_calibration --filter_duplicated --min_pass_len 100 --min_pass_qual 7" 82 | 83 | # f5c parameters 84 | 85 | f5c_index: 86 | threads: 4 87 | mem_mb: 1000 88 | opt: "--iop 4" 89 | 90 | f5c_eventalign: 91 | threads: 4 92 | mem_mb: 1000 93 | opt: "-x laptop-low --rna --samples --signal-index --print-read-names --scale-events" 94 | 95 | # Nanocompore parameters 96 | 97 | nanocompore_eventalign_collapse: 98 | threads: 4 99 | mem_mb: 1000 100 | opt: "" 101 | 102 | nanocompore_sampcomp: 103 | threads: 4 104 | mem_mb: 1000 105 | opt: "--max_invalid_kmers_freq 0.2 --min_coverage 30 --downsample_high_coverage 5000 --min_ref_length 100 --comparison_methods GMM,KS --sequence_context 2 --sequence_context_weights harmonic --pvalue_thr 0.01 --logit" 106 | 107 | nanocompore_postprocess: 108 | threads: 1 109 | mem_mb: 1000 110 | opt: 111 | p_val_lim: 0.01 112 | quantile_lim: 0.5 113 | min_distance: 9 114 | 115 | # tombo parameters 116 | 117 | tombo_preprocess: 118 | threads: 4 119 | mem_mb: 1000 120 | opt: "" 121 | 122 | tombo_level_sample_compare: 123 | threads: 4 124 | mem_mb: 1000 125 | opt: "--minimum-test-reads 30 --fishers-method-context 2 --statistic-type ks --store-p-value" 126 | 127 | tombo_postprocess: 128 | threads: 1 129 | mem_mb: 1000 130 | opt: 131 | min_cov: 30 132 | p_val_lim: 0.01 133 | quantile_lim: 0.5 134 | min_distance: 9 135 | 136 | # differr parameters 137 | 138 | differr_compare: 139 | threads: 4 140 | mem_mb: 1000 141 | opt: "--normalise --max-depth 5000 --fdr-threshold 0.01 --median-expr-threshold 30 --min-expr-threshold 30" 142 | 143 | differr_postprocess: 144 | threads: 1 145 | mem_mb: 1000 146 | opt: "" 147 | 148 | # eligos2 parameters 149 | 150 | eligos2_fasta_to_bed: 151 | threads: 1 152 | mem_mb: 1000 153 | opt: "" 154 | 155 | eligos2_pair_diff_mod: 156 | threads: 4 157 | mem_mb: 1000 158 | opt: "--oddR 0 --esb 0 --min_depth 30 --pval 1 --adjPval 1" 159 | 160 | eligos2_postprocess: 161 | threads: 1 162 | mem_mb: 1000 163 | opt: 164 | min_oddR: 1.2 165 | min_esb: 0 166 | min_cov: 30 167 | max_adj_pval: 0.01 168 | discard_homopolymers: True 169 | ref_base: "A" 170 | 171 | # Mines parameters 172 | 173 | tombo_de_novo: 174 | threads: 4 175 | mem_mb: 1000 176 | opt: "--rna --minimum-test-reads 30 --fishers-method-context 2" 177 | 178 | tombo_de_novo_text_output: 179 | threads: 1 180 | mem_mb: 1000 181 | opt: "" 182 | 183 | mines_wig2bed: 184 | threads: 1 185 | mem_mb: 1000 186 | opt: "" 187 | 188 | mines_cdna: 189 | threads: 1 190 | mem_mb: 1000 191 | opt: "" 192 | 193 | mines_postprocess: 194 | threads: 1 195 | mem_mb: 1000 196 | opt: 197 | min_cov: 30 198 | 199 | 200 | # Epinano paramters 201 | # Could not find value `threads` for rule `ont_guppy_epinano` in config fileCould not find value `opt` for rule `ont_guppy_epinano` in config file 202 | 203 | ont_guppy_epinano: 204 | threads: 4 205 | mem_mb: 6000 206 | opt: "-c rna_r9.4.1_70bps_hac.cfg --records_per_fastq 0 --recursive --disable_pings --calib_detect --num_callers 4 --cpu_threads_per_caller 1" 207 | # example GPU options if using gpu_basecalling instead 208 | #opt: "-c rna_r9.4.1_70bps_hac.cfg --records_per_fastq 0 --recursive --disable_pings --calib_detect --num_callers 8 --gpu_runners_per_device 1 --device 'auto'" 209 | 210 | merge_fastq_epinano: 211 | threads: 1 212 | mem_mb: 1000 213 | opt: "--remove_duplicates --min_len 100 --min_qual 7" 214 | 215 | minimap2_align_epinano: 216 | threads: 4 217 | mem_mb: 1000 218 | opt: "-a --MD -x map-ont" 219 | 220 | alignmemt_prefilter_epinano: 221 | threads: 1 222 | mem_mb: 1000 223 | opt: "--skip_unmapped --skip_secondary --skip_supplementary --index_reads --orientation '+' --min_read_len 100 --min_align_len 100 --min_mapq 10 --min_freq_identity 0.8" 224 | 225 | min_ref_coverage_epinano: 226 | threads: 1 227 | mem_mb: 1000 228 | opt: 229 | min_cov: 30 230 | 231 | alignmemt_postfilter_epinano: 232 | threads: 1 233 | mem_mb: 1000 234 | opt: "--index_reads" 235 | 236 | epinano_postprocess: 237 | threads: 1 238 | mem_mb: 1000 239 | 240 | alignmemt_merge_epinano: 241 | threads: 4 242 | mem_mb: 1000 243 | opt: "" 244 | 245 | epinano_delta_variants: 246 | threads: 1 247 | mem_mb: 8000 248 | opt: 249 | min_cov: 30 250 | 251 | generate_transcriptome_picard_index: 252 | mem_mb: 1000 253 | 254 | epinano_variants: 255 | threads: 1 256 | mem_mb: 8000 257 | 258 | epinano_filter_rrach_variants: 259 | threads: 1 260 | mem_mb: 1000 261 | 262 | epinano_gather_variants: 263 | threads: 1 264 | mem_mb: 1000 265 | 266 | epinano_predict: 267 | threads: 1 268 | mem_mb: 8000 269 | 270 | epinano_delta_predict: 271 | threads: 1 272 | mem_mb: 8000 273 | -------------------------------------------------------------------------------- /MetaCompore/example_lsf.yaml: -------------------------------------------------------------------------------- 1 | __default__: 2 | - "-P project2" 3 | - "-W 1:05" 4 | 5 | foo: 6 | - "-P gpu" 7 | - "-gpu 'gpu resources'" 8 | -------------------------------------------------------------------------------- /MetaCompore/example_samples.tsv: -------------------------------------------------------------------------------- 1 | sample_id condition replicate fast5_dir 2 | s0 control 0 /home/aleg/Programming/Packages/MetaCompore/MetaCompore/data/yeast/fast5/KO_rep0 3 | s1 control 1 /home/aleg/Programming/Packages/MetaCompore/MetaCompore/data/yeast/fast5/KO_rep1 4 | s2 control 2 /home/aleg/Programming/Packages/MetaCompore/MetaCompore/data/yeast/fast5/KO_rep2 5 | s3 test 0 /home/aleg/Programming/Packages/MetaCompore/MetaCompore/data/yeast/fast5/WT_rep0 6 | s4 test 1 /home/aleg/Programming/Packages/MetaCompore/MetaCompore/data/yeast/fast5/WT_rep1 7 | s5 test 2 /home/aleg/Programming/Packages/MetaCompore/MetaCompore/data/yeast/fast5/WT_rep2 8 | -------------------------------------------------------------------------------- /MetaCompore/resources/f5c/r9.4_70bps.u_to_t_rna.5mer.template.model: -------------------------------------------------------------------------------- 1 | #ont_model_name r9.4_180mv_70bps_5mer_RNA 2 | #kit r9.4_70bps 3 | #strand template 4 | #k 5 5 | #alphabet u_to_t_rna 6 | #original_file r9.4_180mv_70bps_5mer_RNA/template_median69pA.model 7 | kmer level_mean level_stdv sd_mean sd_stdv ig_lambda weight 8 | AAAAA 108.901413 2.676522 2.054767 0.857186 11.806934 7172.158009 9 | AAAAC 107.754232 2.676522 2.436309 1.106700 11.806934 4473.874031 10 | AAAAG 101.724425 2.676522 2.279269 1.001440 11.806934 5015.001492 11 | AAAAT 112.768194 2.676522 2.051496 0.855141 11.806934 6952.903142 12 | AAACA 99.384679 3.449749 2.918528 1.314774 14.381008 3759.220177 13 | AAACC 99.995527 3.449749 3.486555 1.716723 14.381008 2638.806645 14 | AAACG 101.014242 3.449749 3.527192 1.746823 14.381008 2322.149962 15 | AAACT 106.914144 3.449749 2.555187 1.077059 14.381008 3832.431110 16 | AAAGA 110.541286 4.062744 3.287399 1.244748 22.929501 6374.751009 17 | AAAGC 107.693450 4.062744 3.651055 1.456902 22.929501 3289.552194 18 | AAAGG 108.287250 4.062744 3.067997 1.122238 22.929501 3042.848084 19 | AAAGT 108.731506 4.062744 3.030668 1.101819 22.929501 3607.702848 20 | AAATA 114.105726 3.108588 2.536485 1.186626 11.589620 5070.435813 21 | AAATC 112.204348 3.108588 2.533213 1.184331 11.589620 4566.778790 22 | AAATG 110.666258 3.108588 3.445155 1.878358 11.589620 3682.789860 23 | AAATT 115.980739 3.108588 2.361944 1.066276 11.589620 7515.685740 24 | AACAA 87.823777 3.181436 2.196993 1.081745 9.062258 4983.003528 25 | AACAC 89.760969 3.181436 2.252439 1.122952 9.062258 2238.684419 26 | AACAG 86.535661 3.181436 2.417050 1.248275 9.062258 3370.886287 27 | AACAT 87.876310 3.181436 2.042451 0.969637 9.062258 3199.653225 28 | AACCA 82.923787 2.812500 1.798393 0.873333 7.625947 3770.869901 29 | AACCC 84.523756 2.812500 2.050856 1.063545 7.625947 1680.785464 30 | AACCG 84.372558 2.812500 1.925219 0.967327 7.625947 2268.563617 31 | AACCT 84.272303 2.812500 2.097420 1.099971 7.625947 2109.398496 32 | AACGA 79.826259 3.180457 3.179832 1.573930 12.979021 2904.573231 33 | AACGC 80.646898 3.180457 3.632438 1.921659 12.979021 2694.287411 34 | AACGG 83.667276 3.180457 3.008674 1.448577 12.979021 2504.938710 35 | AACGT 80.070168 3.180457 2.873425 1.352007 12.979021 2990.068040 36 | AACTA 92.912342 2.731261 2.035029 0.930968 9.723928 2274.228880 37 | AACTC 92.393785 2.731261 1.979659 0.893233 9.723928 2279.971909 38 | AACTG 92.985289 2.731261 2.348630 1.154253 9.723928 3210.947964 39 | AACTT 92.871956 2.731261 2.060667 0.948617 9.723928 3300.475864 40 | AAGAA 122.139294 5.872282 4.646323 2.260901 19.622985 6427.971898 41 | AAGAC 121.025135 5.872282 4.088067 1.865926 19.622985 2262.827920 42 | AAGAG 122.095254 5.872282 4.833334 2.398765 19.622985 3216.802402 43 | AAGAT 124.174740 5.872282 4.228976 1.963226 19.622985 4059.809947 44 | AAGCA 107.336731 3.017274 3.799058 1.944181 14.506225 2275.677482 45 | AAGCC 102.836148 3.017274 4.303556 2.344035 14.506225 2195.969480 46 | AAGCG 109.951791 3.017274 4.851337 2.805530 14.506225 2005.986981 47 | AAGCT 110.976336 3.017274 4.051914 2.141476 14.506225 2836.890930 48 | AAGGA 114.014784 7.840309 4.210749 1.965004 19.335318 2580.921570 49 | AAGGC 115.200439 7.840309 6.759134 3.996326 19.335318 2824.320967 50 | AAGGG 113.123905 7.840309 4.576518 2.226523 19.335318 1404.535706 51 | AAGGT 115.608986 7.840309 5.179045 2.680395 19.335318 2773.141767 52 | AAGTA 119.312160 4.029736 4.352348 2.337984 15.083004 2069.944331 53 | AAGTC 114.941808 4.029736 3.858347 1.951452 15.083004 1760.638617 54 | AAGTG 117.293827 4.029736 5.571029 3.385784 15.083004 2254.945602 55 | AAGTT 119.408277 4.029736 3.908872 1.989909 15.083004 3398.211154 56 | AATAA 97.174409 6.278645 4.220107 2.398234 13.067338 5087.127179 57 | AATAC 102.575110 6.278645 4.958062 3.054038 13.067338 2984.107744 58 | AATAG 99.736573 6.278645 3.845519 2.086118 13.067338 2569.924532 59 | AATAT 99.116744 6.278645 4.336835 2.498422 13.067338 4981.945748 60 | AATCA 100.237818 5.702234 3.594511 1.701398 16.043813 3791.050331 61 | AATCC 103.249725 5.702234 3.923985 1.940605 16.043813 2181.115659 62 | AATCG 101.466911 5.702234 3.844583 1.882002 16.043813 1947.190143 63 | AATCT 101.610711 5.702234 4.199953 2.148883 16.043813 2430.349917 64 | AATGA 85.255182 3.868907 3.939620 2.040860 14.680350 4633.123712 65 | AATGC 81.803851 3.868907 4.586615 2.563718 14.680350 3083.757907 66 | AATGG 89.309222 3.868907 4.058305 2.133776 14.680350 3929.131203 67 | AATGT 82.193226 3.868907 3.740646 1.888217 14.680350 2920.680862 68 | AATTA 101.015119 5.532156 3.023340 1.339784 15.395424 4172.581309 69 | AATTC 101.901081 5.532156 3.410917 1.605502 15.395424 3346.120533 70 | AATTG 100.780633 5.532156 3.759123 1.857522 15.395424 4024.403188 71 | AATTT 100.946682 5.532156 3.988808 2.030340 15.395424 5066.834634 72 | ACAAA 82.446931 3.018476 2.208017 0.988636 11.013742 4419.361805 73 | ACAAC 80.964607 3.018476 1.641824 0.633902 11.013742 3408.807117 74 | ACAAG 78.464350 3.018476 2.604962 1.266878 11.013742 3117.553657 75 | ACAAT 84.126436 3.018476 2.014904 0.861815 11.013742 3232.846004 76 | ACACA 73.593076 1.875816 1.590623 0.698081 8.258278 1443.338308 77 | ACACC 72.157345 1.875816 1.495301 0.636280 8.258278 2001.401835 78 | ACACG 74.894231 1.875816 1.749766 0.805425 8.258278 1231.164736 79 | ACACT 77.763958 1.875816 1.670365 0.751229 8.258278 1426.200695 80 | ACAGA 96.555442 5.093152 6.077673 3.429144 19.091520 1655.632694 81 | ACAGC 87.785962 5.093152 2.970873 1.171941 19.091520 2458.124399 82 | ACAGG 84.200224 5.093152 4.511212 2.192904 19.091520 1788.050780 83 | ACAGT 91.620597 5.093152 4.107931 1.905524 19.091520 1954.909888 84 | ACATA 80.762856 2.134613 1.628574 0.654188 10.092947 1927.721362 85 | ACATC 78.737631 2.134613 1.561754 0.614342 10.092947 2715.892573 86 | ACATG 79.175014 2.134613 1.757069 0.733119 10.092947 2041.268118 87 | ACATT 84.224654 2.134613 1.628088 0.653895 10.092947 3106.231409 88 | ACCAA 74.583242 2.107591 1.434615 0.604302 8.085333 3598.713202 89 | ACCAC 73.952994 2.107591 1.262082 0.498634 8.085333 2968.943305 90 | ACCAG 72.329156 2.107591 1.959084 0.964341 8.085333 2895.288534 91 | ACCAT 74.741150 2.107591 1.339758 0.545369 8.085333 3241.874892 92 | ACCCA 66.298610 2.070961 1.385773 0.651083 6.277753 2216.348691 93 | ACCCC 66.655278 2.070961 1.521562 0.749087 6.277753 827.512548 94 | ACCCG 67.209854 2.070961 1.541065 0.763536 6.277753 1457.747166 95 | ACCCT 68.103653 2.070961 1.405645 0.665138 6.277753 1318.816342 96 | ACCGA 82.817398 2.778549 2.481682 1.111560 12.370080 2028.204437 97 | ACCGC 77.156491 2.778549 1.570390 0.559532 12.370080 2551.864722 98 | ACCGG 76.543903 2.778549 2.351801 1.025450 12.370080 2200.866218 99 | ACCGT 77.731098 2.778549 1.751909 0.659297 12.370080 2389.669545 100 | ACCTA 69.022895 2.349865 1.627642 0.751344 7.638337 1141.507770 101 | ACCTC 69.473323 2.349865 1.360901 0.574434 7.638337 1291.986305 102 | ACCTG 70.748003 2.349865 1.881070 0.933487 7.638337 2310.901042 103 | ACCTT 72.214658 2.349865 1.287195 0.528405 7.638337 2146.065629 104 | ACGAA 85.738453 3.990469 3.507285 1.504554 19.058895 2666.540616 105 | ACGAC 85.837941 3.990469 3.535814 1.522949 19.058895 1580.362067 106 | ACGAG 84.299668 3.990469 3.621263 1.578488 19.058895 952.855795 107 | ACGAT 87.507643 3.990469 3.574306 1.547885 19.058895 2766.816676 108 | ACGCA 76.327788 2.600796 2.590441 1.150588 13.130508 1913.801162 109 | ACGCC 74.767402 2.600796 2.519975 1.103961 13.130508 2749.582166 110 | ACGCG 77.116883 2.600796 2.694454 1.220577 13.130508 1669.730407 111 | ACGCT 79.972499 2.600796 2.514852 1.100596 13.130508 2408.893910 112 | ACGGA 96.558490 4.981474 7.993538 4.629010 23.836415 1501.077953 113 | ACGGC 85.580326 4.981474 3.525998 1.356134 23.836415 2061.617145 114 | ACGGG 83.393928 4.981474 4.812821 2.162614 23.836415 1240.424890 115 | ACGGT 89.182212 4.981474 4.312685 1.834430 23.836415 2838.303454 116 | ACGTA 101.495469 3.049711 6.239933 4.440870 12.319804 540.934039 117 | ACGTC 78.177926 3.049711 2.427039 1.077242 12.319804 1559.841291 118 | ACGTG 78.194744 3.049711 2.707667 1.269378 12.319804 1113.259862 119 | ACGTT 82.520253 3.049711 2.447660 1.091000 12.319804 2531.547379 120 | ACTAA 86.712697 2.631718 1.600059 0.698105 8.405566 1686.999037 121 | ACTAC 86.582543 2.631718 1.898515 0.902272 8.405566 1592.548858 122 | ACTAG 86.098935 2.631718 1.962211 0.948058 8.405566 708.338833 123 | ACTAT 86.600747 2.631718 1.665118 0.741112 8.405566 1843.451912 124 | ACTCA 81.845266 2.848865 1.676833 0.759168 8.180752 1692.616415 125 | ACTCC 80.737367 2.848865 1.844719 0.875989 8.180752 1622.624483 126 | ACTCG 80.877383 2.848865 1.782376 0.831960 8.180752 955.863970 127 | ACTCT 83.723176 2.848865 1.661568 0.748826 8.180752 1919.298850 128 | ACTGA 93.408051 2.850220 2.556345 1.106446 13.645766 2586.780170 129 | ACTGC 86.110953 2.850220 1.848342 0.680260 13.645766 2401.052459 130 | ACTGG 87.124537 2.850220 2.392397 1.001731 13.645766 3243.758967 131 | ACTGT 87.174256 2.850220 1.905537 0.712077 13.645766 2219.219549 132 | ACTTA 77.863849 2.416143 1.467562 0.658184 7.296159 1827.009147 133 | ACTTC 80.337425 2.416143 1.569026 0.727609 7.296159 2496.510727 134 | ACTTG 80.104596 2.416143 1.643795 0.780233 7.296159 2417.092937 135 | ACTTT 82.378469 2.416143 1.475140 0.663289 7.296159 3635.781304 136 | AGAAA 128.133534 5.559623 5.669243 3.288783 16.846307 7149.445562 137 | AGAAC 128.772325 5.559623 6.057161 3.632046 16.846307 4323.693031 138 | AGAAG 123.663906 5.559623 5.143108 2.841752 16.846307 4690.832886 139 | AGAAT 129.862932 5.559623 5.214372 2.901019 16.846307 5331.183458 140 | AGACA 125.561516 4.794395 4.742963 2.624025 15.495778 2424.628549 141 | AGACC 125.603209 4.794395 4.355722 2.309317 15.495778 2159.022211 142 | AGACG 127.315213 4.794395 4.578080 2.488390 15.495778 2409.785188 143 | AGACT 129.807220 4.794395 4.206508 2.191674 15.495778 2198.099374 144 | AGAGA 127.709315 5.691493 4.706719 2.476447 17.001874 3317.168468 145 | AGAGC 128.622891 5.691493 6.269092 3.806786 17.001874 2361.472460 146 | AGAGG 123.332959 5.691493 5.222899 2.894803 17.001874 2150.228121 147 | AGAGT 128.245512 5.691493 4.698349 2.469844 17.001874 2302.463483 148 | AGATA 134.100055 5.101816 4.964251 2.396129 21.307914 4166.571989 149 | AGATC 134.081251 5.101816 4.800366 2.278458 21.307914 3028.868051 150 | AGATG 133.603178 5.101816 5.647704 2.907619 21.307914 4210.410741 151 | AGATT 136.888917 5.101816 4.469835 2.047231 21.307914 5724.328429 152 | AGCAA 115.200570 3.856268 3.830328 1.871677 16.041533 3548.836742 153 | AGCAC 117.191419 3.856268 3.597894 1.703922 16.041533 1784.081324 154 | AGCAG 112.165990 3.856268 3.477457 1.619085 16.041533 2350.418925 155 | AGCAT 114.889148 3.856268 3.756676 1.817953 16.041533 2545.528480 156 | AGCCA 109.316961 3.291378 3.461107 1.598232 16.231747 2756.534416 157 | AGCCC 111.179412 3.291378 3.334267 1.511186 16.231747 1177.922003 158 | AGCCG 109.945677 3.291378 3.388161 1.547973 16.231747 1657.737071 159 | AGCCT 111.210293 3.291378 3.505820 1.629303 16.231747 1505.441017 160 | AGCGA 105.688034 8.394211 6.624417 4.771124 12.770314 2200.897043 161 | AGCGC 110.343163 8.394211 7.344190 5.569486 12.770314 2458.689303 162 | AGCGG 106.831009 8.394211 5.137697 3.258757 12.770314 2357.741608 163 | AGCGT 107.964723 8.394211 6.767708 4.926762 12.770314 2068.184062 164 | AGCTA 117.176759 3.551179 3.262278 1.358191 18.820938 1696.308855 165 | AGCTC 118.063757 3.551179 3.290593 1.375912 18.820938 1495.894460 166 | AGCTG 117.440806 3.551179 3.390190 1.438850 18.820938 2559.815172 167 | AGCTT 118.185419 3.551179 3.531919 1.530014 18.820938 2314.261392 168 | AGGAA 117.457668 3.173751 4.534144 2.606877 13.716547 3505.806577 169 | AGGAC 115.917559 3.173751 5.038990 3.054168 13.716547 1169.935063 170 | AGGAG 116.685364 3.173751 4.962235 2.984651 13.716547 1240.470889 171 | AGGAT 120.367899 3.173751 4.456662 2.540342 13.716547 2538.033241 172 | AGGCA 109.736888 5.310878 6.833086 4.477196 15.916164 2021.819827 173 | AGGCC 109.162065 5.310878 7.570982 5.221666 15.916164 1429.426835 174 | AGGCG 112.237084 5.310878 7.758121 5.416461 15.916164 2157.241937 175 | AGGCT 120.311348 5.310878 7.448327 5.095290 15.916164 2036.245562 176 | AGGGA 115.882913 4.047423 4.332791 1.779497 25.686727 1725.806175 177 | AGGGC 116.404557 4.047423 5.680826 2.671547 25.686727 1771.547266 178 | AGGGG 113.610452 4.047423 5.127082 2.290610 25.686727 878.287437 179 | AGGGT 117.277569 4.047423 4.823616 2.090281 25.686727 1734.112650 180 | AGGTA 119.513047 3.368228 5.227273 2.735479 19.087935 2066.592916 181 | AGGTC 116.546344 3.368228 5.851210 3.239582 19.087935 1281.643736 182 | AGGTG 117.246866 3.368228 6.486450 3.781213 19.087935 2060.249324 183 | AGGTT 121.108797 3.368228 5.481238 2.937234 19.087935 2974.848274 184 | AGTAA 123.302649 8.555862 7.271231 4.869230 16.214488 2351.785849 185 | AGTAC 128.298724 8.555862 6.942624 4.542907 16.214488 1565.650569 186 | AGTAG 122.886079 8.555862 6.547061 4.160236 16.214488 1386.985611 187 | AGTAT 124.927423 8.555862 6.982518 4.582120 16.214488 1955.093256 188 | AGTCA 122.594424 6.071202 5.556860 2.901007 20.388755 1810.006180 189 | AGTCC 125.199218 6.071202 5.666827 2.987546 20.388755 906.123165 190 | AGTCG 122.920112 6.071202 5.566716 2.908729 20.388755 1555.461272 191 | AGTCT 125.263087 6.071202 5.611059 2.943553 20.388755 1349.090564 192 | AGTGA 100.006169 9.689941 7.090350 5.631627 11.239198 2831.452854 193 | AGTGC 110.680015 9.689941 10.522708 10.181778 11.239198 1475.298746 194 | AGTGG 111.727303 9.689941 7.175992 5.733967 11.239198 1751.221585 195 | AGTGT 105.145547 9.689941 8.495778 7.386472 11.239198 1401.789241 196 | AGTTA 118.770310 7.489815 5.600367 2.945103 20.251096 2052.391915 197 | AGTTC 123.871670 7.489815 5.597530 2.942865 20.251096 2726.889894 198 | AGTTG 121.107442 7.489815 6.245828 3.468653 20.251096 2753.852350 199 | AGTTT 124.343096 7.489815 5.918670 3.199719 20.251096 3609.499150 200 | ATAAA 86.593813 3.040826 2.244916 1.063238 10.007805 5472.147746 201 | ATAAC 85.402731 3.040826 2.085527 0.952038 10.007805 2985.402055 202 | ATAAG 82.488915 3.040826 2.680390 1.387163 10.007805 2580.453941 203 | ATAAT 89.363951 3.040826 2.372988 1.155511 10.007805 4580.176725 204 | ATACA 76.777933 2.102993 1.868619 0.876137 8.499982 1905.739101 205 | ATACC 74.688134 2.102993 1.874514 0.880287 8.499982 2094.051285 206 | ATACG 77.766007 2.102993 2.021492 0.985823 8.499982 1639.353153 207 | ATACT 81.803551 2.102993 2.049941 1.006707 8.499982 1679.003046 208 | ATAGA 103.910533 3.781763 4.462197 2.262303 17.359805 1835.853629 209 | ATAGC 91.224691 3.781763 2.519081 0.959601 17.359805 2191.129829 210 | ATAGG 85.792962 3.781763 3.496569 1.569245 17.359805 1086.606899 211 | ATAGT 93.786839 3.781763 3.022571 1.261224 17.359805 2076.299243 212 | ATATA 86.666350 2.632201 2.314854 1.116791 9.945506 3403.428148 213 | ATATC 83.154921 2.632201 2.108995 0.971181 9.945506 3911.492644 214 | ATATG 83.953361 2.632201 2.440820 1.209178 9.945506 3152.151637 215 | ATATT 90.486693 2.632201 2.329824 1.127642 9.945506 5047.319281 216 | ATCAA 78.880781 2.300504 1.499345 0.678924 7.312444 5099.988685 217 | ATCAC 78.572102 2.300504 1.499039 0.678716 7.312444 2497.341057 218 | ATCAG 76.739343 2.300504 2.074739 1.105132 7.312444 3368.508140 219 | ATCAT 79.199937 2.300504 1.394153 0.608744 7.312444 4208.341183 220 | ATCCA 70.227353 2.298440 1.602266 0.776891 6.815280 2989.390405 221 | ATCCC 70.317427 2.298440 1.807000 0.930455 6.815280 1627.083730 222 | ATCCG 71.191272 2.298440 1.732068 0.873184 6.815280 1959.123157 223 | ATCCT 73.537614 2.298440 1.688241 0.840252 6.815280 1987.411459 224 | ATCGA 84.425297 2.302527 2.224017 0.974107 11.593152 3228.870864 225 | ATCGC 79.589757 2.302527 1.734616 0.670972 11.593152 3200.619923 226 | ATCGG 79.885390 2.302527 2.066258 0.872320 11.593152 2470.493087 227 | ATCGT 80.616314 2.302527 1.730260 0.668446 11.593152 2529.484301 228 | ATCTA 76.666444 2.065374 1.654564 0.760266 7.836445 1989.529153 229 | ATCTC 76.559406 2.065374 1.520270 0.669610 7.836445 2160.576700 230 | ATCTG 77.109611 2.065374 1.836441 0.889008 7.836445 2806.235622 231 | ATCTT 78.865989 2.065374 1.381844 0.580268 7.836445 3596.155021 232 | ATGAA 94.578090 4.487612 3.649479 1.740654 16.042343 5083.575768 233 | ATGAC 94.313336 4.487612 3.819926 1.864011 16.042343 2923.043486 234 | ATGAG 93.097632 4.487612 3.577532 1.689435 16.042343 2102.400628 235 | ATGAT 97.784441 4.487612 3.824477 1.867343 16.042343 4901.412466 236 | ATGCA 84.446488 3.125398 2.971856 1.566726 10.692942 2144.230630 237 | ATGCC 79.659624 3.125398 2.963882 1.560424 10.692942 2408.623943 238 | ATGCG 83.143635 3.125398 3.199832 1.750419 10.692942 1817.233719 239 | ATGCT 86.666404 3.125398 2.933119 1.536193 10.692942 3081.991468 240 | ATGGA 100.762608 3.994511 4.924368 2.503148 19.058043 3616.397283 241 | ATGGC 94.077436 3.994511 3.254190 1.344699 19.058043 2892.237070 242 | ATGGG 92.935047 3.994511 4.004568 1.835669 19.058043 1573.735453 243 | ATGGT 96.353408 3.994511 3.376926 1.421488 19.058043 4021.840665 244 | ATGTA 88.609282 3.775722 3.184323 1.740685 10.656408 1699.410650 245 | ATGTC 85.928321 3.775722 2.958195 1.558601 10.656408 2048.023833 246 | ATGTG 87.079193 3.775722 3.638068 2.125694 10.656408 1855.637078 247 | ATGTT 89.594494 3.775722 3.058190 1.638291 10.656408 4381.202618 248 | ATTAA 85.297515 2.458906 1.551700 0.685470 7.951444 4256.035210 249 | ATTAC 85.209980 2.458906 1.766635 0.832717 7.951444 3088.773197 250 | ATTAG 84.559987 2.458906 1.931006 0.951595 7.951444 1931.103331 251 | ATTAT 85.437560 2.458906 1.616706 0.728993 7.951444 4905.381348 252 | ATTCA 78.438600 2.072592 1.545630 0.692914 7.690558 3978.738585 253 | ATTCC 78.046180 2.072592 1.850179 0.907490 7.690558 2606.541017 254 | ATTCG 78.538058 2.072592 1.657788 0.769688 7.690558 2211.687388 255 | ATTCT 81.643698 2.072592 1.651643 0.765412 7.690558 2881.582379 256 | ATTGA 90.368377 2.649410 2.369135 1.021018 12.755660 5554.490957 257 | ATTGC 83.878953 2.649410 1.776891 0.663193 12.755660 4769.297413 258 | ATTGG 86.040127 2.649410 2.272754 0.959350 12.755660 4068.910442 259 | ATTGT 85.124486 2.649410 1.858419 0.709355 12.755660 4527.659996 260 | ATTTA 77.576374 1.969599 1.530534 0.701380 7.288230 4042.168311 261 | ATTTC 78.373440 1.969599 1.450973 0.647408 7.288230 4296.722245 262 | ATTTG 79.100081 1.969599 1.633968 0.773667 7.288230 4133.497007 263 | ATTTT 81.164568 1.969599 1.423056 0.628814 7.288230 6257.667127 264 | CAAAA 105.724444 2.676522 2.220147 0.962729 11.806934 4693.125545 265 | CAAAC 104.218946 2.676522 2.330343 1.035288 11.806934 3285.995187 266 | CAAAG 102.997175 2.676522 2.830245 1.385695 11.806934 3394.456790 267 | CAAAT 110.065655 2.676522 2.654716 1.258805 11.806934 3714.690583 268 | CAACA 91.375475 3.449749 1.946454 0.716096 14.381008 3854.111910 269 | CAACC 89.369315 3.449749 2.165240 0.840165 14.381008 2381.595038 270 | CAACG 91.909217 3.449749 2.569437 1.086081 14.381008 2980.556379 271 | CAACT 96.109537 3.449749 2.151213 0.832013 14.381008 2525.630270 272 | CAAGA 110.387667 4.062744 3.661830 1.463356 22.929501 3614.040904 273 | CAAGC 102.912958 4.062744 4.658198 2.099567 22.929501 1622.994183 274 | CAAGG 106.169711 4.062744 3.809009 1.552461 22.929501 2129.677430 275 | CAAGT 105.116845 4.062744 3.572757 1.410289 22.929501 2251.349651 276 | CAATA 109.258367 3.108588 2.756737 1.344493 11.589620 3827.272736 277 | CAATC 106.205847 3.108588 2.378910 1.077785 11.589620 3016.740246 278 | CAATG 102.741785 3.108588 4.020846 2.368326 11.589620 3158.371720 279 | CAATT 108.911579 3.108588 2.538208 1.187835 11.589620 4068.416823 280 | CACAA 81.773599 3.181436 1.779802 0.788750 9.062258 2308.522833 281 | CACAC 81.035104 3.181436 1.778882 0.788139 9.062258 1399.540196 282 | CACAG 81.433659 3.181436 2.411326 1.243843 9.062258 1472.964001 283 | CACAT 81.245111 3.181436 1.660179 0.710582 9.062258 1742.549171 284 | CACCA 75.529943 2.812500 1.486978 0.656613 7.625947 3446.140578 285 | CACCC 75.114913 2.812500 1.654204 0.770437 7.625947 1463.076519 286 | CACCG 76.380811 2.812500 1.655631 0.771433 7.625947 2396.076833 287 | CACCT 75.565847 2.812500 1.597748 0.731333 7.625947 1906.879568 288 | CACGA 78.732084 3.180457 2.269443 0.948982 12.979021 1469.534408 289 | CACGC 78.137316 3.180457 2.105572 0.848075 12.979021 1758.616290 290 | CACGG 79.718920 3.180457 2.377608 1.017629 12.979021 1388.834651 291 | CACGT 80.278731 3.180457 2.106240 0.848478 12.979021 1277.791946 292 | CACTA 86.856587 2.731261 1.801203 0.775216 9.723928 1484.373714 293 | CACTC 85.308154 2.731261 1.688175 0.703405 9.723928 1189.491190 294 | CACTG 85.744053 2.731261 1.876303 0.824202 9.723928 2409.393454 295 | CACTT 85.070789 2.731261 1.677919 0.697004 9.723928 2368.561813 296 | CAGAA 108.596176 5.872282 5.282649 2.740912 19.622985 3625.629970 297 | CAGAC 107.090001 5.872282 5.245644 2.712162 19.622985 1899.126476 298 | CAGAG 106.732949 5.872282 5.680886 3.056621 19.622985 1836.549513 299 | CAGAT 112.383806 5.872282 6.272736 3.546524 19.622985 2876.280736 300 | CAGCA 108.254756 3.017274 2.765615 1.207565 14.506225 4869.775545 301 | CAGCC 103.968265 3.017274 2.531903 1.057775 14.506225 2656.190332 302 | CAGCG 109.185090 3.017274 3.613962 1.803841 14.506225 3449.455462 303 | CAGCT 112.049883 3.017274 2.602147 1.102099 14.506225 3106.672040 304 | CAGGA 98.378248 7.840309 5.379264 2.837322 19.335318 1769.012188 305 | CAGGC 114.132204 7.840309 7.841637 4.993838 19.335318 3130.662990 306 | CAGGG 89.169347 7.840309 4.592939 2.238517 19.335318 1237.686302 307 | CAGGT 115.661044 7.840309 5.914644 3.271276 19.335318 2226.472508 308 | CAGTA 123.504210 4.029736 4.416409 2.389792 15.083004 2166.038956 309 | CAGTC 118.298104 4.029736 3.837268 1.935483 15.083004 1832.477867 310 | CAGTG 119.527728 4.029736 5.353137 3.189104 15.083004 1965.922499 311 | CAGTT 122.772197 4.029736 4.026375 2.080307 15.083004 3265.405641 312 | CATAA 89.904186 6.278645 2.495514 1.090552 13.067338 2686.049783 313 | CATAC 90.479100 6.278645 3.067984 1.486572 13.067338 1878.567602 314 | CATAG 91.261151 6.278645 2.738382 1.253567 13.067338 1217.320074 315 | CATAT 89.582788 6.278645 2.497244 1.091687 13.067338 2541.434568 316 | CATCA 87.168862 5.702234 2.294284 0.867595 16.043813 5098.555341 317 | CATCC 87.391196 5.702234 2.890353 1.226797 16.043813 2762.553234 318 | CATCG 88.224484 5.702234 2.550898 1.017152 16.043813 2977.982015 319 | CATCT 88.359730 5.702234 2.621305 1.059553 16.043813 3365.512023 320 | CATGA 84.285822 3.868907 2.436705 0.992742 14.680350 2529.360588 321 | CATGC 78.230209 3.868907 2.300356 0.910593 14.680350 1824.621208 322 | CATGG 85.730638 3.868907 2.487348 1.023851 14.680350 2130.954295 323 | CATGT 79.728396 3.868907 2.069186 0.776839 14.680350 1841.035371 324 | CATTA 90.584171 5.532156 1.982617 0.711480 15.395424 3612.918233 325 | CATTC 89.921897 5.532156 2.241653 0.855375 15.395424 2853.978431 326 | CATTG 90.610816 5.532156 2.264631 0.868561 15.395424 4162.409239 327 | CATTT 89.282133 5.532156 2.302247 0.890291 15.395424 5015.622537 328 | CCAAA 87.188010 3.018476 2.372149 1.100894 11.013742 4892.244713 329 | CCAAC 85.163730 3.018476 1.746204 0.695304 11.013742 2885.635435 330 | CCAAG 86.103450 3.018476 3.245886 1.762108 11.013742 2559.187254 331 | CCAAT 89.462193 3.018476 2.285102 1.040857 11.013742 3827.936575 332 | CCACA 75.596677 1.875816 1.379650 0.563908 8.258278 1892.023928 333 | CCACC 72.576022 1.875816 1.196921 0.455673 8.258278 2692.550144 334 | CCACG 75.294463 1.875816 1.520009 0.652116 8.258278 2011.740508 335 | CCACT 77.893144 1.875816 1.665998 0.748285 8.258278 2324.041448 336 | CCAGA 76.218670 5.093152 4.306598 2.045414 19.091520 1938.101683 337 | CCAGC 92.884026 5.093152 2.763029 1.051134 19.091520 6245.221353 338 | CCAGG 91.801754 5.093152 5.455771 2.916513 19.091520 2152.360990 339 | CCAGT 96.017075 5.093152 4.062032 1.873677 19.091520 3812.642329 340 | CCATA 86.439459 2.134613 2.014351 0.899899 10.092947 2549.599416 341 | CCATC 83.878091 2.134613 1.826165 0.776785 10.092947 3760.485333 342 | CCATG 82.776712 2.134613 2.063120 0.932777 10.092947 2301.817960 343 | CCATT 87.392322 2.134613 1.963139 0.865800 10.092947 4307.318338 344 | CCCAA 73.427269 2.107591 1.913524 0.930898 8.085333 2002.962873 345 | CCCAC 70.732523 2.107591 1.467221 0.625021 8.085333 1255.823777 346 | CCCAG 71.951577 2.107591 2.697803 1.558355 8.085333 1718.529548 347 | CCCAT 73.356310 2.107591 1.864123 0.895082 8.085333 2013.374821 348 | CCCCA 62.973194 2.070961 1.334907 0.615566 6.277753 1432.637735 349 | CCCCC 62.476770 2.070961 1.245223 0.554585 6.277753 514.037057 350 | CCCCG 63.753187 2.070961 1.616744 0.820465 6.277753 698.687048 351 | CCCCT 64.584982 2.070961 1.257360 0.562714 6.277753 680.536042 352 | CCCGA 83.922531 2.778549 2.883917 1.392477 12.370080 1149.339985 353 | CCCGC 76.792394 2.778549 2.077335 0.851282 12.370080 1931.867154 354 | CCCGG 78.031696 2.778549 3.355897 1.747940 12.370080 1317.756878 355 | CCCGT 78.010037 2.778549 2.327277 1.009453 12.370080 1380.623288 356 | CCCTA 69.670969 2.349865 1.873841 0.928110 7.638337 562.324004 357 | CCCTC 68.726024 2.349865 1.482202 0.652923 7.638337 656.340526 358 | CCCTG 71.635619 2.349865 2.383712 1.331623 7.638337 1339.301164 359 | CCCTT 70.581909 2.349865 1.517908 0.676658 7.638337 1374.655742 360 | CCGAA 97.550106 3.990469 3.126700 1.266428 19.058895 2302.551454 361 | CCGAC 96.661828 3.990469 3.513625 1.508635 19.058895 1808.659676 362 | CCGAG 95.856533 3.990469 3.290162 1.367027 19.058895 1087.915637 363 | CCGAT 100.626525 3.990469 3.619836 1.577555 19.058895 2240.610738 364 | CCGCA 87.112345 2.600796 2.432410 1.046922 13.130508 2548.074982 365 | CCGCC 84.082353 2.600796 2.232288 0.920417 13.130508 2943.230360 366 | CCGCG 87.086242 2.600796 3.027411 1.453672 13.130508 2114.468033 367 | CCGCT 89.389588 2.600796 2.330703 0.981950 13.130508 2998.155747 368 | CCGGA 106.163531 4.981474 6.098712 3.084869 23.836415 2461.490843 369 | CCGGC 95.132647 4.981474 2.898081 1.010521 23.836415 2577.953877 370 | CCGGG 92.989032 4.981474 4.707729 2.092169 23.836415 1776.409902 371 | CCGGT 98.101357 4.981474 4.002018 1.639827 23.836415 3291.478446 372 | CCGTA 92.468273 3.049711 3.045405 1.514139 12.319804 1574.266050 373 | CCGTC 89.273835 3.049711 2.722250 1.279647 12.319804 2022.082258 374 | CCGTG 90.049000 3.049711 3.169233 1.607420 12.319804 1845.020903 375 | CCGTT 92.470035 3.049711 2.758225 1.305097 12.319804 2707.183307 376 | CCTAA 82.461064 2.631718 1.654790 0.734228 8.405566 1134.167454 377 | CCTAC 79.054347 2.631718 1.509888 0.639932 8.405566 856.043619 378 | CCTAG 83.949886 2.631718 2.517204 1.377508 8.405566 431.558224 379 | CCTAT 82.627592 2.631718 1.652903 0.732972 8.405566 1328.910015 380 | CCTCA 74.242313 2.848865 1.310813 0.524704 8.180752 1542.507724 381 | CCTCC 72.353229 2.848865 1.441780 0.605274 8.180752 1068.713315 382 | CCTCG 75.009135 2.848865 1.518296 0.654091 8.180752 1077.234920 383 | CCTCT 75.306135 2.848865 1.436636 0.602037 8.180752 1386.101709 384 | CCTGA 94.039444 2.850220 2.707101 1.205751 13.645766 3383.163516 385 | CCTGC 84.197292 2.850220 1.839179 0.675207 13.645766 3179.348620 386 | CCTGG 88.236624 2.850220 2.887041 1.327946 13.645766 2564.411420 387 | CCTGT 86.870090 2.850220 2.197495 0.881846 13.645766 2277.197205 388 | CCTTA 75.795766 2.416143 1.358772 0.586371 7.296159 1501.573515 389 | CCTTC 75.319996 2.416143 1.267842 0.528507 7.296159 2140.525274 390 | CCTTG 77.655142 2.416143 1.708836 0.826996 7.296159 1799.838475 391 | CCTTT 76.374573 2.416143 1.266334 0.527564 7.296159 2671.383114 392 | CGAAA 112.586708 5.559623 4.732049 2.507965 16.846307 3019.369928 393 | CGAAC 111.470647 5.559623 5.855333 3.452034 16.846307 1923.354079 394 | CGAAG 109.772606 5.559623 4.649157 2.442356 16.846307 2436.636199 395 | CGAAT 115.648356 5.559623 4.020065 1.963800 16.846307 2114.618148 396 | CGACA 109.269229 4.794395 7.260920 4.970279 15.495778 1647.137641 397 | CGACC 110.091256 4.794395 7.508789 5.226948 15.495778 1108.066545 398 | CGACG 110.922300 4.794395 7.799351 5.533260 15.495778 1479.310449 399 | CGACT 113.970531 4.794395 6.962340 4.666874 15.495778 1171.052768 400 | CGAGA 116.155274 5.691493 3.765166 1.771853 17.001874 1281.327757 401 | CGAGC 110.677177 5.691493 5.030248 2.736123 17.001874 1167.611882 402 | CGAGG 111.035628 5.691493 5.426046 3.065327 17.001874 889.991916 403 | CGAGT 112.737434 5.691493 4.299560 2.162159 17.001874 980.082573 404 | CGATA 117.344639 5.101816 5.278250 2.627026 21.307914 2484.957864 405 | CGATC 117.717234 5.101816 5.634945 2.897771 21.307914 1975.908171 406 | CGATG 114.129926 5.101816 7.802129 4.721165 21.307914 2811.354842 407 | CGATT 120.276539 5.101816 5.584144 2.858673 21.307914 2094.421771 408 | CGCAA 99.447505 3.856268 4.636846 2.492936 16.041533 2127.572452 409 | CGCAC 101.875718 3.856268 4.680924 2.528567 16.041533 1533.262696 410 | CGCAG 97.161063 3.856268 4.019517 2.012046 16.041533 2025.980540 411 | CGCAT 99.569561 3.856268 4.452814 2.346005 16.041533 1845.173434 412 | CGCCA 94.993064 3.291378 4.237438 2.165072 16.231747 3327.782476 413 | CGCCC 96.312384 3.291378 4.573603 2.427756 16.231747 1401.876805 414 | CGCCG 95.605523 3.291378 4.181185 2.122103 16.231747 2549.063741 415 | CGCCT 96.759042 3.291378 4.412221 2.300399 16.231747 1927.679989 416 | CGCGA 90.738766 8.394211 4.578790 2.741734 12.770314 2005.880314 417 | CGCGC 90.608398 8.394211 5.807892 3.916761 12.770314 3100.407745 418 | CGCGG 92.975641 8.394211 4.477048 2.650860 12.770314 2032.606948 419 | CGCGT 90.718377 8.394211 5.082679 3.206552 12.770314 1998.906913 420 | CGCTA 102.056437 3.551179 3.141956 1.283747 18.820938 1525.965210 421 | CGCTC 102.446194 3.551179 3.714141 1.649935 18.820938 1324.945488 422 | CGCTG 101.736582 3.551179 3.753717 1.676377 18.820938 3645.218228 423 | CGCTT 102.109079 3.551179 4.089645 1.906373 18.820938 2077.764045 424 | CGGAA 123.966612 3.173751 3.526928 1.788432 13.716547 2112.678066 425 | CGGAC 120.901628 3.173751 3.102081 1.475222 13.716547 1004.360409 426 | CGGAG 122.923242 3.173751 3.649564 1.882518 13.716547 822.753420 427 | CGGAT 128.823294 3.173751 3.444719 1.726269 13.716547 1967.756450 428 | CGGCA 106.600732 5.310878 3.339519 1.529702 15.916164 2661.351536 429 | CGGCC 103.668104 5.310878 3.603111 1.714341 15.916164 915.878159 430 | CGGCG 107.829566 5.310878 4.360755 2.282566 15.916164 3374.232082 431 | CGGCT 110.544478 5.310878 3.367072 1.548672 15.916164 1359.167676 432 | CGGGA 115.555709 4.047423 3.638061 1.369149 25.686727 1134.630410 433 | CGGGC 113.737989 4.047423 6.304047 3.123020 25.686727 1447.568601 434 | CGGGG 112.661902 4.047423 4.557772 1.919882 25.686727 857.814709 435 | CGGGT 113.905389 4.047423 4.661891 1.986044 25.686727 1235.657897 436 | CGGTA 121.853626 3.368228 3.917266 1.774578 19.087935 3010.917380 437 | CGGTC 117.184886 3.368228 3.305269 1.375406 19.087935 1654.784663 438 | CGGTG 117.991203 3.368228 4.670763 2.310480 19.087935 3052.888845 439 | CGGTT 120.767331 3.368228 3.553411 1.533164 19.087935 2577.371721 440 | CGTAA 98.858970 8.555862 5.194616 2.940211 16.214488 2088.898068 441 | CGTAC 102.949287 8.555862 6.386287 4.007938 16.214488 1216.965968 442 | CGTAG 98.638064 8.555862 4.790857 2.604163 16.214488 1123.854198 443 | CGTAT 101.566811 8.555862 5.414689 3.129022 16.214488 1780.498824 444 | CGTCA 99.903703 6.071202 5.885945 3.162489 20.388755 2449.187121 445 | CGTCC 102.207827 6.071202 6.581329 3.739169 20.388755 1132.548201 446 | CGTCG 100.204250 6.071202 6.227762 3.441935 20.388755 1880.964505 447 | CGTCT 102.027027 6.071202 6.409075 3.593335 20.388755 1535.211041 448 | CGTGA 93.869948 9.689941 4.259594 2.622314 11.239198 1844.184786 449 | CGTGC 89.889788 9.689941 6.073943 4.465174 11.239198 2006.629714 450 | CGTGG 94.809034 9.689941 4.471538 2.820445 11.239198 1887.587504 451 | CGTGT 91.659335 9.689941 4.433867 2.784879 11.239198 1087.538937 452 | CGTTA 98.213897 7.489815 4.231368 1.934180 20.251096 2893.241790 453 | CGTTC 100.912595 7.489815 4.993030 2.479260 20.251096 2167.704244 454 | CGTTG 97.807475 7.489815 4.645839 2.225215 20.251096 3257.906710 455 | CGTTT 99.903576 7.489815 5.017591 2.497576 20.251096 3314.452778 456 | CTAAA 94.001495 3.040826 1.896177 0.825371 10.007805 2895.034320 457 | CTAAC 91.503304 3.040826 1.787041 0.755148 10.007805 1557.943046 458 | CTAAG 91.476358 3.040826 2.305529 1.106589 10.007805 1321.268552 459 | CTAAT 96.700470 3.040826 2.059231 0.934089 10.007805 2379.047283 460 | CTACA 80.292630 2.102993 1.511375 0.637308 8.499982 1769.950034 461 | CTACC 76.498772 2.102993 1.388713 0.561319 8.499982 1942.734955 462 | CTACG 80.581504 2.102993 1.671816 0.741436 8.499982 1503.601098 463 | CTACT 83.468721 2.102993 1.694469 0.756557 8.499982 1566.770236 464 | CTAGA 107.082810 3.781763 3.397808 1.503231 17.359805 1382.451764 465 | CTAGC 97.296107 3.781763 2.580000 0.994620 17.359805 1128.282954 466 | CTAGG 96.478532 3.781763 3.442112 1.532728 17.359805 384.437571 467 | CTAGT 99.587554 3.781763 2.660337 1.041436 17.359805 1021.699158 468 | CTATA 94.690569 2.632201 2.402889 1.181102 9.945506 1487.482400 469 | CTATC 91.526294 2.632201 2.178663 1.019698 9.945506 2032.145649 470 | CTATG 90.280097 2.632201 2.535859 1.280484 9.945506 1419.149124 471 | CTATT 95.551739 2.632201 2.243643 1.065657 9.945506 2720.145050 472 | CTCAA 79.126807 2.300504 1.508117 0.684891 7.312444 2833.180906 473 | CTCAC 76.729095 2.300504 1.318890 0.560120 7.312444 1307.623412 474 | CTCAG 77.484860 2.300504 2.026493 1.066809 7.312444 1377.278917 475 | CTCAT 79.739756 2.300504 1.458820 0.651586 7.312444 2085.702751 476 | CTCCA 69.759632 2.298440 1.353582 0.603233 6.815280 2614.620063 477 | CTCCC 67.633523 2.298440 1.295055 0.564535 6.815280 896.389676 478 | CTCCG 70.391059 2.298440 1.461016 0.676458 6.815280 1296.436948 479 | CTCCT 71.318115 2.298440 1.288264 0.560100 6.815280 1353.794852 480 | CTCGA 86.233646 2.302527 2.303768 1.026969 11.593152 1261.401648 481 | CTCGC 79.792592 2.302527 1.711379 0.657535 11.593152 1234.031908 482 | CTCGG 82.389078 2.302527 2.186223 0.949382 11.593152 789.206745 483 | CTCGT 81.943681 2.302527 1.784260 0.699981 11.593152 1037.112980 484 | CTCTA 79.847504 2.065374 1.612528 0.731478 7.836445 1441.438267 485 | CTCTC 77.342469 2.065374 1.464088 0.632836 7.836445 1559.262017 486 | CTCTG 79.002204 2.065374 1.663752 0.766608 7.836445 2073.426756 487 | CTCTT 79.011443 2.065374 1.383244 0.581151 7.836445 2952.035624 488 | CTGAA 107.927419 4.487612 3.426347 1.583482 16.042343 4423.225953 489 | CTGAC 107.161899 4.487612 3.379766 1.551301 16.042343 3004.864309 490 | CTGAG 106.042371 4.487612 3.442449 1.594658 16.042343 1329.883383 491 | CTGAT 111.640205 4.487612 3.536619 1.660537 16.042343 4622.078315 492 | CTGCA 94.883447 3.125398 2.431234 1.159289 10.692942 3146.176270 493 | CTGCC 90.931381 3.125398 2.198176 0.996656 10.692942 3023.657766 494 | CTGCG 94.040237 3.125398 3.139345 1.701022 10.692942 2563.728906 495 | CTGCT 96.865308 3.125398 2.300520 1.067064 10.692942 4920.407640 496 | CTGGA 108.345742 3.994511 3.036507 1.212055 19.058043 4450.291442 497 | CTGGC 102.611198 3.994511 3.828919 1.716228 19.058043 3690.962831 498 | CTGGG 101.643559 3.994511 3.013475 1.198291 19.058043 2464.014858 499 | CTGGT 105.223430 3.994511 2.787019 1.065788 19.058043 5124.611477 500 | CTGTA 102.056314 3.775722 3.297250 1.834097 10.656408 1940.340463 501 | CTGTC 98.483746 3.775722 3.017965 1.606075 10.656408 2085.705224 502 | CTGTG 99.727199 3.775722 3.906251 2.365020 10.656408 1863.649165 503 | CTGTT 102.176860 3.775722 3.056487 1.636923 10.656408 3848.809667 504 | CTTAA 84.323058 2.458906 1.466977 0.630103 7.951444 2175.786116 505 | CTTAC 81.759819 2.458906 1.477119 0.636649 7.951444 1500.330712 506 | CTTAG 85.063590 2.458906 1.942577 0.960161 7.951444 1088.123648 507 | CTTAT 84.653150 2.458906 1.464884 0.628756 7.951444 2613.767895 508 | CTTCA 77.661841 2.072592 1.229141 0.491387 7.690558 3766.415916 509 | CTTCC 74.217828 2.072592 1.244632 0.500706 7.690558 3306.486935 510 | CTTCG 78.028442 2.072592 1.341227 0.560112 7.690558 2302.696950 511 | CTTCT 78.042195 2.072592 1.262908 0.511775 7.690558 3316.330902 512 | CTTGA 91.802992 2.649410 2.262626 0.952944 12.755660 3702.894918 513 | CTTGC 84.545291 2.649410 1.687037 0.613529 12.755660 2220.947174 514 | CTTGG 88.300823 2.649410 2.242201 0.940071 12.755660 2368.845909 515 | CTTGT 86.773103 2.649410 1.887261 0.725933 12.755660 2826.396148 516 | CTTTA 79.438553 1.969599 1.327055 0.566269 7.288230 3398.135058 517 | CTTTC 77.724359 1.969599 1.255477 0.521077 7.288230 4031.244182 518 | CTTTG 80.359399 1.969599 1.413892 0.622749 7.288230 4353.470903 519 | CTTTT 79.706453 1.969599 1.172468 0.470262 7.288230 4496.605103 520 | GAAAA 106.417182 2.676522 2.563890 1.194760 11.806934 6064.863536 521 | GAAAC 105.923314 2.676522 2.607963 1.225699 11.806934 3512.904939 522 | GAAAG 103.812292 2.676522 3.273844 1.723925 11.806934 3481.504374 523 | GAAAT 111.123449 2.676522 2.900775 1.437813 11.806934 4679.321893 524 | GAACA 95.518188 3.449749 2.926021 1.319841 14.381008 3517.915862 525 | GAACC 94.695014 3.449749 3.836669 1.981695 14.381008 1797.174764 526 | GAACG 96.735979 3.449749 3.681157 1.862438 14.381008 2105.897921 527 | GAACT 100.790006 3.449749 3.047814 1.403098 14.381008 2524.762495 528 | GAAGA 105.357780 4.062744 4.730221 2.148448 22.929501 5620.511029 529 | GAAGC 100.441004 4.062744 5.543003 2.725338 22.929501 2852.191655 530 | GAAGG 105.256818 4.062744 4.143789 1.761567 22.929501 2737.085099 531 | GAAGT 103.994043 4.062744 4.433764 1.949672 22.929501 3166.922213 532 | GAATA 111.538011 3.108588 2.945776 1.485132 11.589620 3613.115794 533 | GAATC 109.543621 3.108588 2.506319 1.165520 11.589620 2882.819122 534 | GAATG 107.509864 3.108588 3.897696 2.260359 11.589620 2523.178290 535 | GAATT 112.107008 3.108588 2.529318 1.181601 11.589620 4085.787513 536 | GACAA 80.850283 3.181436 2.633693 1.419808 9.062258 3228.934011 537 | GACAC 84.444571 3.181436 3.202162 1.903473 9.062258 1248.135184 538 | GACAG 80.987648 3.181436 3.068166 1.785254 9.062258 2026.584043 539 | GACAT 82.607304 3.181436 2.712317 1.483859 9.062258 1967.440824 540 | GACCA 79.015681 2.812500 3.106261 1.982485 7.625947 1871.141748 541 | GACCC 79.175974 2.812500 3.278525 2.149665 7.625947 1009.769586 542 | GACCG 79.366723 2.812500 2.837055 1.730433 7.625947 1270.492472 543 | GACCT 79.904812 2.812500 3.435133 2.305518 7.625947 1053.967129 544 | GACGA 75.702193 3.180457 3.502995 1.819861 12.979021 3419.704886 545 | GACGC 73.050304 3.180457 3.298164 1.662599 12.979021 2737.414944 546 | GACGG 77.571244 3.180457 3.520776 1.833735 12.979021 2468.628082 547 | GACGT 75.245744 3.180457 2.928679 1.391191 12.979021 2278.508251 548 | GACTA 90.235941 2.731261 2.900788 1.584357 9.723928 1047.694430 549 | GACTC 88.671729 2.731261 3.020328 1.683295 9.723928 1088.892632 550 | GACTG 88.632815 2.731261 2.993266 1.660723 9.723928 1383.562416 551 | GACTT 89.122930 2.731261 3.219869 1.852833 9.723928 1748.850515 552 | GAGAA 125.757042 5.872282 4.617947 2.240221 19.622985 3246.105510 553 | GAGAC 123.324763 5.872282 4.408124 2.089287 19.622985 1100.098182 554 | GAGAG 124.524117 5.872282 4.928056 2.469625 19.622985 1519.203426 555 | GAGAT 130.181950 5.872282 4.173649 1.924825 19.622985 2492.415741 556 | GAGCA 107.012728 3.017274 3.909844 2.029841 14.506225 2244.005065 557 | GAGCC 103.081095 3.017274 3.625457 1.812454 14.506225 1225.562129 558 | GAGCG 109.620464 3.017274 4.225319 2.280406 14.506225 1958.682001 559 | GAGCT 112.005913 3.017274 3.397016 1.643877 14.506225 1454.492212 560 | GAGGA 112.671894 7.840309 5.826620 3.198522 19.335318 1947.342189 561 | GAGGC 113.935395 7.840309 8.042163 5.186611 19.335318 1362.777938 562 | GAGGG 115.248936 7.840309 5.686848 3.084124 19.335318 805.283760 563 | GAGGT 112.282643 7.840309 6.740174 3.979522 19.335318 1661.960018 564 | GAGTA 123.381585 4.029736 4.545266 2.495140 15.083004 1460.780502 565 | GAGTC 117.524297 4.029736 4.289581 2.287591 15.083004 1065.873016 566 | GAGTG 120.912293 4.029736 5.970699 3.756591 15.083004 1133.655181 567 | GAGTT 123.901001 4.029736 4.179541 2.200134 15.083004 2350.471359 568 | GATAA 88.342579 6.278645 4.075789 2.276271 13.067338 5051.178572 569 | GATAC 94.910185 6.278645 5.220817 3.300001 13.067338 1989.070058 570 | GATAG 90.963031 6.278645 4.034700 2.241937 13.067338 2357.505209 571 | GATAT 88.946304 6.278645 3.979246 2.195875 13.067338 4409.720981 572 | GATCA 93.448757 5.702234 4.355474 2.269339 16.043813 2860.555774 573 | GATCC 95.140680 5.702234 4.912990 2.718724 16.043813 1978.403091 574 | GATCG 95.150868 5.702234 4.498268 2.381848 16.043813 1867.749210 575 | GATCT 95.383665 5.702234 4.914661 2.720111 16.043813 2180.481654 576 | GATGA 79.435190 3.868907 3.768753 1.909538 14.680350 7350.249616 577 | GATGC 75.419335 3.868907 3.801752 1.934673 14.680350 5049.578674 578 | GATGG 82.943318 3.868907 3.941617 2.042412 14.680350 5279.477951 579 | GATGT 76.838450 3.868907 3.348317 1.599085 14.680350 4909.196272 580 | GATTA 96.012405 5.532156 3.728159 1.834618 15.395424 3013.954489 581 | GATTC 96.042397 5.532156 4.133107 2.141505 15.395424 2650.811719 582 | GATTG 96.095711 5.532156 4.393003 2.346640 15.395424 2696.856181 583 | GATTT 94.713886 5.532156 4.715570 2.609789 15.395424 4498.573668 584 | GCAAA 84.463941 3.018476 2.460582 1.163025 11.013742 4830.747978 585 | GCAAC 82.484518 3.018476 2.097235 0.915173 11.013742 2933.923070 586 | GCAAG 81.863290 3.018476 3.158322 1.691286 11.013742 2341.358599 587 | GCAAT 86.302178 3.018476 2.458503 1.161552 11.013742 3945.757339 588 | GCACA 73.145516 1.875816 2.158359 1.103420 8.258278 1464.391350 589 | GCACC 70.716194 1.875816 2.274472 1.193648 8.258278 1638.330109 590 | GCACG 73.836896 1.875816 2.129364 1.081260 8.258278 1659.460445 591 | GCACT 77.182899 1.875816 2.323316 1.232303 8.258278 1362.025021 592 | GCAGA 99.930045 5.093152 4.947858 2.518869 19.091520 2184.467790 593 | GCAGC 89.545715 5.093152 2.795868 1.069928 19.091520 3834.018355 594 | GCAGG 87.860142 5.093152 4.906568 2.487405 19.091520 2488.448075 595 | GCAGT 91.458345 5.093152 3.739288 1.654867 19.091520 2561.434012 596 | GCATA 82.807560 2.134613 2.145270 0.989040 10.092947 1638.958133 597 | GCATC 80.540925 2.134613 2.106158 0.962116 10.092947 2392.591853 598 | GCATG 80.707449 2.134613 2.236117 1.052526 10.092947 1965.733898 599 | GCATT 85.122196 2.134613 2.037731 0.915611 10.092947 2863.466729 600 | GCCAA 73.255993 2.107591 2.175304 1.128317 8.085333 2970.362878 601 | GCCAC 71.672481 2.107591 1.778288 0.833977 8.085333 2164.715272 602 | GCCAG 71.083389 2.107591 2.857607 1.698849 8.085333 3229.596627 603 | GCCAT 73.701553 2.107591 2.231099 1.172004 8.085333 2895.460646 604 | GCCCA 64.626903 2.070961 2.508123 1.585335 6.277753 1470.504617 605 | GCCCC 64.850173 2.070961 2.644546 1.716424 6.277753 754.926055 606 | GCCCG 65.639915 2.070961 2.820310 1.890354 6.277753 1336.171575 607 | GCCCT 66.352446 2.070961 2.200408 1.302725 6.277753 1086.318803 608 | GCCGA 81.386803 2.778549 2.771539 1.311884 12.370080 2237.056128 609 | GCCGC 76.262836 2.778549 2.236935 0.951248 12.370080 3124.641670 610 | GCCGG 75.728029 2.778549 3.283784 1.691904 12.370080 2034.649775 611 | GCCGT 76.982950 2.778549 2.377655 1.042406 12.370080 2287.625385 612 | GCCTA 68.430678 2.349865 2.776262 1.673752 7.638337 815.999847 613 | GCCTC 67.986250 2.349865 2.389797 1.336725 7.638337 927.512060 614 | GCCTG 69.957644 2.349865 2.853710 1.744277 7.638337 2449.573236 615 | GCCTT 70.994549 2.349865 2.110170 1.109115 7.638337 1807.234268 616 | GCGAA 92.586828 3.990469 3.988590 1.824653 19.058895 2732.174754 617 | GCGAC 91.214195 3.990469 4.129989 1.922536 19.058895 2076.296318 618 | GCGAG 90.142127 3.990469 4.064351 1.876886 19.058895 1415.622182 619 | GCGAT 95.899993 3.990469 4.304052 2.045349 19.058895 3049.425857 620 | GCGCA 80.887105 2.600796 3.085305 1.495569 13.130508 3300.153662 621 | GCGCC 78.753951 2.600796 2.776764 1.276932 13.130508 2941.406654 622 | GCGCG 80.592982 2.600796 2.945482 1.395063 13.130508 2758.529635 623 | GCGCT 83.205450 2.600796 2.697283 1.222501 13.130508 3122.506194 624 | GCGGA 100.670666 4.981474 5.847066 2.895920 23.836415 2104.013670 625 | GCGGC 90.293090 4.981474 3.464390 1.320747 23.836415 3916.060025 626 | GCGGG 87.603071 4.981474 4.554573 1.990908 23.836415 2131.027440 627 | GCGGT 92.738763 4.981474 3.979525 1.626022 23.836415 2718.177811 628 | GCGTA 84.849331 3.049711 3.396120 1.783087 12.319804 2081.507579 629 | GCGTC 82.777181 3.049711 3.012186 1.489432 12.319804 2144.305328 630 | GCGTG 83.373080 3.049711 3.349647 1.746613 12.319804 2148.451746 631 | GCGTT 86.636417 3.049711 2.884196 1.395518 12.319804 3032.343822 632 | GCTAA 84.404682 2.631718 1.938036 0.930592 8.405566 2897.377350 633 | GCTAC 85.335403 2.631718 2.333022 1.229121 8.405566 1411.118105 634 | GCTAG 84.436792 2.631718 2.620921 1.463516 8.405566 609.335945 635 | GCTAT 84.827557 2.631718 2.037129 1.002869 8.405566 2251.681343 636 | GCTCA 76.968120 2.848865 2.089163 1.055752 8.180752 2216.400211 637 | GCTCC 78.882359 2.848865 2.815770 1.651958 8.180752 1292.670000 638 | GCTCG 78.058375 2.848865 2.308342 1.226178 8.180752 1358.819376 639 | GCTCT 80.104367 2.848865 2.437006 1.330112 8.180752 1682.642544 640 | GCTGA 89.958937 2.850220 2.621315 1.148894 13.645766 4498.895497 641 | GCTGC 83.434922 2.850220 2.117606 0.834197 13.645766 4284.128421 642 | GCTGG 85.531612 2.850220 2.696813 1.198885 13.645766 4763.542923 643 | GCTGT 84.347111 2.850220 2.181101 0.871997 13.645766 3223.196796 644 | GCTTA 77.596356 2.416143 1.849624 0.931275 7.296159 1792.494876 645 | GCTTC 77.979596 2.416143 1.936646 0.997765 7.296159 2537.482077 646 | GCTTG 79.568036 2.416143 2.227782 1.231011 7.296159 1994.145605 647 | GCTTT 80.135777 2.416143 2.025221 1.066992 7.296159 3611.867318 648 | GGAAA 121.472353 5.559623 5.317176 2.987234 16.846307 4512.183994 649 | GGAAC 121.878077 5.559623 6.193709 3.755553 16.846307 2393.121968 650 | GGAAG 115.762850 5.559623 4.933937 2.670165 16.846307 3630.476096 651 | GGAAT 123.503365 5.559623 4.755098 2.526311 16.846307 3098.276715 652 | GGACA 118.398012 4.794395 5.535750 3.308704 15.495778 1348.932903 653 | GGACC 119.234723 4.794395 5.317542 3.115012 15.495778 721.630137 654 | GGACG 120.922346 4.794395 5.394069 3.182498 15.495778 1277.855825 655 | GGACT 123.834135 4.794395 4.447412 2.382618 15.495778 1359.695760 656 | GGAGA 119.434686 5.691493 4.653709 2.434728 17.001874 2023.531067 657 | GGAGC 121.187789 5.691493 5.707241 3.306670 17.001874 1392.268637 658 | GGAGG 114.428791 5.691493 5.702497 3.302548 17.001874 1213.659166 659 | GGAGT 117.274779 5.691493 5.252696 2.919611 17.001874 1529.951398 660 | GGATA 126.875295 5.101816 4.957220 2.391040 21.307914 2397.407348 661 | GGATC 127.167558 5.101816 4.812473 2.287083 21.307914 1968.188966 662 | GGATG 126.022338 5.101816 6.052727 3.225939 21.307914 2713.069910 663 | GGATT 129.827199 5.101816 4.553844 2.105216 21.307914 3284.345845 664 | GGCAA 108.942858 3.856268 4.597952 2.461635 16.041533 3167.910601 665 | GGCAC 111.254903 3.856268 3.988338 1.988680 16.041533 1802.399974 666 | GGCAG 105.659758 3.856268 4.394347 2.299951 16.041533 2273.329775 667 | GGCAT 109.069062 3.856268 4.747274 2.582519 16.041533 2266.411260 668 | GGCCA 103.130746 3.291378 4.590509 2.441229 16.231747 1579.708007 669 | GGCCC 105.110774 3.291378 4.124699 2.079245 16.231747 761.419325 670 | GGCCG 104.042571 3.291378 4.787337 2.599910 16.231747 964.368882 671 | GGCCT 105.481062 3.291378 4.476853 2.351130 16.231747 1336.797163 672 | GGCGA 92.444142 8.394211 6.439820 4.573090 12.770314 3058.318676 673 | GGCGC 100.100489 8.394211 8.062812 6.406620 12.770314 2204.526414 674 | GGCGG 98.786021 8.394211 5.791306 3.899995 12.770314 2737.648481 675 | GGCGT 95.655980 8.394211 7.289994 5.507950 12.770314 2168.133197 676 | GGCTA 110.693339 3.551179 4.270410 2.034154 18.820938 1272.894770 677 | GGCTC 111.329712 3.551179 4.070985 1.893340 18.820938 1260.268912 678 | GGCTG 111.283445 3.551179 4.332981 2.079024 18.820938 2707.489207 679 | GGCTT 112.465273 3.551179 4.685232 2.337631 18.820938 2277.379112 680 | GGGAA 120.765797 3.173751 3.752969 1.963089 13.716547 1754.973182 681 | GGGAC 118.289222 3.173751 3.573755 1.824168 13.716547 934.382738 682 | GGGAG 120.289092 3.173751 4.058212 2.207393 13.716547 792.035476 683 | GGGAT 124.022802 3.173751 3.422014 1.709229 13.716547 1710.823452 684 | GGGCA 106.356917 5.310878 5.533086 3.262360 15.916164 1715.375205 685 | GGGCC 104.099241 5.310878 6.281260 3.945944 15.916164 930.094979 686 | GGGCG 108.228545 5.310878 6.468788 4.123967 15.916164 1884.223186 687 | GGGCT 113.279865 5.310878 5.538696 3.267323 15.916164 1463.948511 688 | GGGGA 114.799941 4.047423 4.149193 1.667597 25.686727 1329.785300 689 | GGGGC 113.915524 4.047423 5.419200 2.489134 25.686727 835.215396 690 | GGGGG 110.710467 4.047423 4.823425 2.090157 25.686727 540.172346 691 | GGGGT 113.895885 4.047423 4.708690 2.016024 25.686727 853.299159 692 | GGGTA 119.336919 3.368228 4.530068 2.206874 19.087935 1690.989124 693 | GGGTC 114.734501 3.368228 4.512707 2.194200 19.087935 1063.125315 694 | GGGTG 115.897733 3.368228 5.540892 2.985314 19.087935 1321.049679 695 | GGGTT 119.328531 3.368228 4.493927 2.180517 19.087935 2080.985585 696 | GGTAA 113.286729 8.555862 7.891453 5.505336 16.214488 3044.070734 697 | GGTAC 118.692017 8.555862 7.094734 4.693021 16.214488 1794.446668 698 | GGTAG 107.544746 8.555862 6.463121 4.080485 16.214488 1471.062567 699 | GGTAT 114.714143 8.555862 7.660231 5.265155 16.214488 2661.566100 700 | GGTCA 112.905071 6.071202 6.311940 3.511955 20.388755 2065.831874 701 | GGTCC 116.089711 6.071202 5.730173 3.037780 20.388755 788.912718 702 | GGTCG 115.677666 6.071202 6.185449 3.406917 20.388755 1450.207942 703 | GGTCT 117.089991 6.071202 6.131326 3.362298 20.388755 1307.427154 704 | GGTGA 94.587253 9.689941 6.522003 4.968254 11.239198 5356.245368 705 | GGTGC 93.689325 9.689941 9.280742 8.433471 11.239198 2821.140689 706 | GGTGG 99.471489 9.689941 6.691095 5.162715 11.239198 3044.693475 707 | GGTGT 93.039700 9.689941 7.541798 6.177954 11.239198 2645.383306 708 | GGTTA 109.619445 7.489815 6.114958 3.360206 20.251096 2110.370040 709 | GGTTC 114.367087 7.489815 6.228159 3.453944 20.251096 1776.515164 710 | GGTTG 109.940264 7.489815 7.013722 4.127609 20.251096 1838.056555 711 | GGTTT 112.498453 7.489815 7.168070 4.264608 20.251096 2925.841085 712 | GTAAA 88.783209 3.040826 2.224801 1.048981 10.007805 4282.972891 713 | GTAAC 87.408144 3.040826 2.082877 0.950224 10.007805 2366.278468 714 | GTAAG 85.907661 3.040826 2.658263 1.370022 10.007805 2303.982479 715 | GTAAT 91.846761 3.040826 2.449224 1.211639 10.007805 3186.137854 716 | GTACA 76.340673 2.102993 2.009966 0.977404 8.499982 1623.569300 717 | GTACC 73.994145 2.102993 2.039155 0.998772 8.499982 1622.603237 718 | GTACG 77.413261 2.102993 2.246057 1.154575 8.499982 1390.681754 719 | GTACT 81.125916 2.102993 2.288085 1.187132 8.499982 1506.917391 720 | GTAGA 102.409502 3.781763 4.133818 2.017227 17.359805 1821.838128 721 | GTAGC 92.905249 3.781763 2.926078 1.201314 17.359805 1713.750008 722 | GTAGG 113.507644 3.781763 5.640812 3.215437 17.359805 556.556652 723 | GTAGT 93.937556 3.781763 2.929570 1.203465 17.359805 1765.609496 724 | GTATA 89.664332 2.632201 2.519324 1.267980 9.945506 1654.145871 725 | GTATC 86.997922 2.632201 2.145435 0.996459 9.945506 2506.740147 726 | GTATG 86.708170 2.632201 2.671982 1.384959 9.945506 1860.469108 727 | GTATT 91.860975 2.632201 2.389962 1.171583 9.945506 3347.274771 728 | GTCAA 77.558218 2.300504 1.894317 0.964157 7.312444 3275.636115 729 | GTCAC 76.200107 2.300504 1.815066 0.904289 7.312444 1670.875154 730 | GTCAG 75.633527 2.300504 2.424144 1.395746 7.312444 2161.049886 731 | GTCAT 78.480856 2.300504 1.748775 0.855204 7.312444 2917.467036 732 | GTCCA 68.750632 2.298440 1.986838 1.072758 6.815280 1673.286713 733 | GTCCC 68.271499 2.298440 2.832693 1.826238 6.815280 654.996832 734 | GTCCG 69.693728 2.298440 2.210680 1.259062 6.815280 1196.755532 735 | GTCCT 70.254407 2.298440 2.073982 1.144105 6.815280 1068.090799 736 | GTCGA 83.261071 2.302527 2.566331 1.207448 11.593152 2306.200438 737 | GTCGC 78.622211 2.302527 2.044662 0.858680 11.593152 2373.675108 738 | GTCGG 79.123155 2.302527 2.392615 1.086947 11.593152 1821.054626 739 | GTCGT 79.776852 2.302527 2.109033 0.899548 11.593152 2181.500800 740 | GTCTA 77.221483 2.065374 1.982815 0.997387 7.836445 1159.771142 741 | GTCTC 76.088681 2.065374 1.821022 0.877836 7.836445 1271.502889 742 | GTCTG 77.441233 2.065374 2.130705 1.111029 7.836445 2259.343127 743 | GTCTT 77.911303 2.065374 1.680259 0.778045 7.836445 2142.898698 744 | GTGAA 101.502426 4.487612 4.144484 2.106550 16.042343 3634.022478 745 | GTGAC 99.690026 4.487612 4.273414 2.205609 16.042343 2283.091093 746 | GTGAG 99.325519 4.487612 4.073268 2.052488 16.042343 1650.108210 747 | GTGAT 105.215470 4.487612 4.196201 2.146103 16.042343 3313.434587 748 | GTGCA 87.074004 3.125398 3.638389 2.122341 10.692942 2374.595824 749 | GTGCC 83.630070 3.125398 3.471922 1.978365 10.692942 2332.337150 750 | GTGCG 87.121505 3.125398 3.784367 2.251341 10.692942 1887.723640 751 | GTGCT 90.013484 3.125398 3.430562 1.943118 10.692942 2508.126096 752 | GTGGA 103.597649 3.994511 4.006940 1.837300 19.058043 2559.709867 753 | GTGGC 96.907775 3.994511 4.282329 2.029929 19.058043 2701.068121 754 | GTGGG 94.433499 3.994511 3.916929 1.775740 19.058043 1630.294157 755 | GTGGT 98.902261 3.994511 3.740729 1.657277 19.058043 3702.727864 756 | GTGTA 93.808736 3.775722 4.099660 2.542824 10.656408 1350.724079 757 | GTGTC 90.407547 3.775722 3.755719 2.229637 10.656408 1865.952231 758 | GTGTG 90.872067 3.775722 4.538264 2.961619 10.656408 1443.268716 759 | GTGTT 95.076695 3.775722 3.762321 2.235519 10.656408 2822.543079 760 | GTTAA 83.143687 2.458906 1.678012 0.770849 7.951444 3710.238529 761 | GTTAC 82.192014 2.458906 1.905640 0.932907 7.951444 2064.417278 762 | GTTAG 82.234327 2.458906 2.082889 1.066047 7.951444 1466.314802 763 | GTTAT 83.687266 2.458906 1.699251 0.785531 7.951444 3878.929932 764 | GTTCA 76.776405 2.072592 1.720529 0.813793 7.690558 3519.997717 765 | GTTCC 75.950870 2.072592 2.330948 1.283276 7.690558 1804.342634 766 | GTTCG 76.911413 2.072592 1.896498 0.941780 7.690558 2458.296422 767 | GTTCT 77.377615 2.072592 1.802409 0.872572 7.690558 2710.312822 768 | GTTGA 88.484060 2.649410 2.529890 1.126681 12.755660 4761.625227 769 | GTTGC 81.946813 2.649410 1.952953 0.764163 12.755660 4185.274085 770 | GTTGG 84.702338 2.649410 2.326426 0.993534 12.755660 3574.561837 771 | GTTGT 83.618460 2.649410 1.996171 0.789669 12.755660 3963.616452 772 | GTTTA 77.979258 1.969599 1.710607 0.828732 7.288230 2763.162662 773 | GTTTC 77.605310 1.969599 1.573070 0.730821 7.288230 3082.634894 774 | GTTTG 78.970582 1.969599 1.799658 0.894281 7.288230 3808.469812 775 | GTTTT 78.700758 1.969599 1.528245 0.699807 7.288230 5039.118428 776 | TAAAA 104.532801 2.676522 1.942220 0.787733 11.806934 4452.772373 777 | TAAAC 103.448060 2.676522 2.113529 0.894219 11.806934 2694.968464 778 | TAAAG 102.281548 2.676522 2.488709 1.142596 11.806934 3074.696204 779 | TAAAT 108.512552 2.676522 2.283751 1.004395 11.806934 3940.955794 780 | TAACA 93.834782 3.449749 2.273052 0.903690 14.381008 2302.307125 781 | TAACC 93.341858 3.449749 2.727665 1.187933 14.381008 1845.879122 782 | TAACG 95.297372 3.449749 2.781927 1.223556 14.381008 2739.952589 783 | TAACT 99.065298 3.449749 2.387941 0.973062 14.381008 2233.070692 784 | TAAGA 107.757941 4.062744 3.479162 1.355235 22.929501 2048.118936 785 | TAAGC 101.583416 4.062744 4.329073 1.881027 22.929501 1538.636480 786 | TAAGG 103.920470 4.062744 3.068701 1.122625 22.929501 1692.479404 787 | TAAGT 104.095042 4.062744 3.256467 1.227221 22.929501 1688.923264 788 | TAATA 108.459316 3.108588 2.437590 1.117909 11.589620 3211.021792 789 | TAATC 106.551687 3.108588 2.265432 1.001594 11.589620 2585.871039 790 | TAATG 104.662530 3.108588 3.494667 1.918996 11.589620 2863.540294 791 | TAATT 109.834489 3.108588 2.198860 0.957771 11.589620 3191.371511 792 | TACAA 83.037210 3.181436 1.915075 0.880361 9.062258 2970.376704 793 | TACAC 84.223213 3.181436 1.922525 0.885502 9.062258 1549.561383 794 | TACAG 79.554841 3.181436 1.959735 0.911334 9.062258 1976.494663 795 | TACAT 83.152532 3.181436 1.756628 0.773395 9.062258 1935.023183 796 | TACCA 77.842866 2.812500 1.536744 0.689851 7.625947 2972.220179 797 | TACCC 78.844230 2.812500 1.724200 0.819851 7.625947 1580.162667 798 | TACCG 78.268785 2.812500 1.710382 0.810014 7.625947 2556.584103 799 | TACCT 79.015265 2.812500 1.663611 0.777018 7.625947 2010.877750 800 | TACGA 79.560849 3.180457 2.325688 0.984478 12.979021 1544.169633 801 | TACGC 78.112805 3.180457 2.277669 0.954146 12.979021 1856.911476 802 | TACGG 80.133722 3.180457 2.434272 1.054223 12.979021 1851.805393 803 | TACGT 79.237142 3.180457 2.000081 0.785146 12.979021 1653.097699 804 | TACTA 87.983707 2.731261 1.804486 0.777337 9.723928 1379.342327 805 | TACTC 87.806457 2.731261 1.854768 0.810053 9.723928 1456.010765 806 | TACTG 87.789603 2.731261 1.927322 0.858046 9.723928 2459.596448 807 | TACTT 88.099745 2.731261 1.779268 0.761099 9.723928 2766.044065 808 | TAGAA 122.169039 5.872282 4.126966 1.892622 19.622985 2341.574106 809 | TAGAC 120.421736 5.872282 3.418992 1.427133 19.622985 1113.084693 810 | TAGAG 121.126709 5.872282 4.486860 2.145514 19.622985 1560.692584 811 | TAGAT 126.659807 5.872282 4.326744 2.031699 19.622985 2096.391363 812 | TAGCA 105.355994 3.017274 2.690008 1.158385 14.506225 1630.134447 813 | TAGCC 101.168169 3.017274 2.659453 1.138705 14.506225 1253.087178 814 | TAGCG 105.732615 3.017274 3.626310 1.813093 14.506225 1587.160139 815 | TAGCT 109.433183 3.017274 2.653713 1.135020 14.506225 1376.987140 816 | TAGGA 100.227487 7.840309 3.768616 1.663785 19.335318 967.019000 817 | TAGGC 93.674860 7.840309 4.440649 2.128110 19.335318 785.928019 818 | TAGGG 89.436063 7.840309 3.547800 1.519718 19.335318 608.612262 819 | TAGGT 96.831170 7.840309 4.268533 2.005591 19.335318 1181.072449 820 | TAGTA 118.823116 4.029736 4.271479 2.273126 15.083004 1334.008701 821 | TAGTC 113.727764 4.029736 3.285160 1.533172 15.083004 1074.995106 822 | TAGTG 115.893989 4.029736 5.031972 2.906453 15.083004 1373.400886 823 | TAGTT 118.667582 4.029736 3.657399 1.801003 15.083004 1881.484147 824 | TATAA 92.840964 6.278645 3.104254 1.513011 13.067338 2685.984418 825 | TATAC 95.089351 6.278645 3.659238 1.936388 13.067338 2046.165132 826 | TATAG 92.782758 6.278645 2.995869 1.434467 13.067338 1855.348259 827 | TATAT 93.546451 6.278645 2.973504 1.418435 13.067338 3053.866752 828 | TATCA 92.764259 5.702234 2.867902 1.212530 16.043813 4233.130900 829 | TATCC 94.256226 5.702234 3.349267 1.530280 16.043813 2634.639056 830 | TATCG 94.102591 5.702234 2.985266 1.287718 16.043813 3213.952359 831 | TATCT 94.806013 5.702234 3.100090 1.362723 16.043813 3344.132169 832 | TATGA 82.979728 3.868907 2.651923 1.127128 14.680350 2817.805477 833 | TATGC 78.952386 3.868907 2.928539 1.308002 14.680350 2282.528030 834 | TATGG 86.356502 3.868907 2.857455 1.260669 14.680350 2984.215287 835 | TATGT 80.199740 3.868907 2.538899 1.055845 14.680350 2363.089984 836 | TATTA 94.345850 5.532156 2.464658 0.986141 15.395424 3629.764370 837 | TATTC 95.059591 5.532156 2.801007 1.194746 15.395424 3860.163443 838 | TATTG 94.789647 5.532156 2.757972 1.167318 15.395424 4750.603133 839 | TATTT 94.778489 5.532156 2.928635 1.277327 15.395424 5605.179494 840 | TCAAA 87.611027 3.018476 1.950654 0.820924 11.013742 5491.795197 841 | TCAAC 85.786031 3.018476 1.575780 0.596040 11.013742 3933.253972 842 | TCAAG 85.704511 3.018476 2.727651 1.357425 11.013742 3085.467959 843 | TCAAT 89.199187 3.018476 1.951429 0.821413 11.013742 5405.575540 844 | TCACA 75.587554 1.875816 1.219448 0.468598 8.258278 2148.207246 845 | TCACC 74.443151 1.875816 1.184770 0.448752 8.258278 3479.587283 846 | TCACG 75.822226 1.875816 1.479329 0.626113 8.258278 1944.750327 847 | TCACT 79.431579 1.875816 1.413590 0.584844 8.258278 2492.854632 848 | TCAGA 100.980285 5.093152 5.154313 2.678157 19.091520 2146.738365 849 | TCAGC 90.483847 5.093152 2.376180 0.838298 19.091520 4882.281852 850 | TCAGG 89.105518 5.093152 4.664291 2.305464 19.091520 2451.169979 851 | TCAGT 93.137594 5.093152 3.463875 1.475445 19.091520 2979.301717 852 | TCATA 87.266543 2.134613 1.834043 0.781817 10.092947 2779.684630 853 | TCATC 85.069007 2.134613 1.687101 0.689768 10.092947 4630.076235 854 | TCATG 83.806031 2.134613 1.852088 0.793384 10.092947 2703.155851 855 | TCATT 88.355500 2.134613 1.715146 0.707038 10.092947 4989.865073 856 | TCCAA 74.332779 2.107591 1.508238 0.651412 8.085333 3394.544673 857 | TCCAC 72.874417 2.107591 1.263382 0.499405 8.085333 2390.919831 858 | TCCAG 72.520894 2.107591 2.006100 0.999264 8.085333 2847.705515 859 | TCCAT 74.367230 2.107591 1.428439 0.600404 8.085333 3145.444205 860 | TCCCA 64.833964 2.070961 1.319211 0.604741 6.277753 2250.173841 861 | TCCCC 64.935394 2.070961 1.245314 0.554646 6.277753 1014.919091 862 | TCCCG 65.580357 2.070961 1.429618 0.682225 6.277753 1182.276229 863 | TCCCT 66.257182 2.070961 1.238428 0.550053 6.277753 1498.851016 864 | TCCGA 81.879199 2.778549 2.470945 1.104354 12.370080 1300.786618 865 | TCCGC 76.600631 2.778549 1.705683 0.633375 12.370080 2386.394467 866 | TCCGG 76.572635 2.778549 2.599230 1.191464 12.370080 2305.710210 867 | TCCGT 77.035604 2.778549 1.859255 0.720813 12.370080 1738.955091 868 | TCCTA 70.141326 2.349865 1.462755 0.640115 7.638337 943.085534 869 | TCCTC 70.827724 2.349865 1.259911 0.511694 7.638337 1822.198664 870 | TCCTG 71.639246 2.349865 1.741559 0.831588 7.638337 2074.880247 871 | TCCTT 73.284641 2.349865 1.254903 0.508646 7.638337 2501.957256 872 | TCGAA 96.866468 3.990469 3.003073 1.192065 19.058895 2859.934453 873 | TCGAC 96.268265 3.990469 3.179903 1.298889 19.058895 2116.306913 874 | TCGAG 94.813241 3.990469 2.976532 1.176297 19.058895 1442.164690 875 | TCGAT 99.037693 3.990469 3.278728 1.359907 19.058895 3480.361654 876 | TCGCA 87.165793 2.600796 2.345458 0.991290 13.130508 1748.771508 877 | TCGCC 84.427951 2.600796 2.137890 0.862655 13.130508 3344.523152 878 | TCGCG 86.827111 2.600796 2.860865 1.335381 13.130508 2190.159414 879 | TCGCT 88.706494 2.600796 2.200524 0.900842 13.130508 2816.936019 880 | TCGGA 102.029446 4.981474 5.308192 2.504953 23.836415 1440.991940 881 | TCGGC 92.919744 4.981474 2.696644 0.907016 23.836415 2685.753074 882 | TCGGG 90.954481 4.981474 3.891459 1.572347 23.836415 1164.121374 883 | TCGGT 95.286181 4.981474 3.334287 1.247050 23.836415 3515.504773 884 | TCGTA 91.752110 3.049711 2.760480 1.306698 12.319804 1393.616694 885 | TCGTC 89.203426 3.049711 2.480724 1.113181 12.319804 2556.047323 886 | TCGTG 89.971282 3.049711 3.239375 1.661078 12.319804 1743.191021 887 | TCGTT 92.104410 3.049711 2.474749 1.109162 12.319804 2741.800831 888 | TCTAA 84.149385 2.631718 1.486206 0.624935 8.405566 2197.870428 889 | TCTAC 83.057752 2.631718 1.694130 0.760565 8.405566 1731.258612 890 | TCTAG 84.052304 2.631718 2.015208 0.986725 8.405566 1081.265673 891 | TCTAT 83.974139 2.631718 1.484515 0.623869 8.405566 2583.046700 892 | TCTCA 76.048058 2.848865 1.409451 0.585030 8.180752 2029.517486 893 | TCTCC 75.362909 2.848865 1.559349 0.680798 8.180752 2062.393647 894 | TCTCG 78.424879 2.848865 1.559342 0.680793 8.180752 1068.545844 895 | TCTCT 79.441550 2.848865 1.500360 0.642535 8.180752 2429.193974 896 | TCTGA 91.368585 2.850220 2.400721 1.006963 13.645766 3363.530141 897 | TCTGC 83.897234 2.850220 1.729013 0.615458 13.645766 3015.584966 898 | TCTGG 86.500177 2.850220 2.436861 1.029787 13.645766 3519.267519 899 | TCTGT 84.953592 2.850220 1.830268 0.670306 13.645766 2851.762187 900 | TCTTA 77.861940 2.416143 1.233987 0.507480 7.296159 1990.044092 901 | TCTTC 76.974940 2.416143 1.216972 0.497020 7.296159 3825.346978 902 | TCTTG 80.890077 2.416143 1.549553 0.714106 7.296159 2841.752937 903 | TCTTT 80.386068 2.416143 1.314452 0.557917 7.296159 4818.018667 904 | TGAAA 118.114917 5.559623 6.234451 3.792669 16.846307 5672.736083 905 | TGAAC 118.657969 5.559623 7.483045 4.987296 16.846307 3042.025990 906 | TGAAG 114.748297 5.559623 5.621684 3.247486 16.846307 4160.081308 907 | TGAAT 120.761633 5.559623 5.656638 3.277821 16.846307 4492.042148 908 | TGACA 117.047761 4.794395 7.150343 4.857174 15.495778 2206.386282 909 | TGACC 118.437055 4.794395 7.042169 4.747369 15.495778 1887.605841 910 | TGACG 118.741830 4.794395 7.492147 5.209580 15.495778 2453.083770 911 | TGACT 121.902530 4.794395 6.490613 4.200702 15.495778 1856.961374 912 | TGAGA 117.689804 5.691493 4.815772 2.563011 17.001874 2387.821240 913 | TGAGC 117.855778 5.691493 6.734494 4.238470 17.001874 1687.461930 914 | TGAGG 114.260596 5.691493 5.948369 3.518426 17.001874 1514.968618 915 | TGAGT 116.262908 5.691493 5.491618 3.121059 17.001874 1730.011711 916 | TGATA 124.237335 5.101816 6.211792 3.353937 21.307914 3152.634457 917 | TGATC 124.545517 5.101816 6.565565 3.644498 21.307914 2343.139659 918 | TGATG 123.134584 5.101816 7.981222 4.884652 21.307914 3956.177695 919 | TGATT 127.734628 5.101816 6.180862 3.328918 21.307914 3948.433315 920 | TGCAA 106.024708 3.856268 4.662233 2.513437 16.041533 2599.676904 921 | TGCAC 108.558497 3.856268 4.346445 2.262447 16.041533 1349.556118 922 | TGCAG 103.316898 3.856268 4.120043 2.087996 16.041533 1973.401638 923 | TGCAT 106.106244 3.856268 4.580039 2.447264 16.041533 1919.358425 924 | TGCCA 101.347109 3.291378 4.038824 2.014651 16.231747 3101.738672 925 | TGCCC 102.953936 3.291378 4.008096 1.991703 16.231747 1491.833806 926 | TGCCG 102.070117 3.291378 3.848811 1.874163 16.231747 2787.390543 927 | TGCCT 102.970515 3.291378 4.224466 2.155138 16.231747 1716.036812 928 | TGCGA 94.552419 8.394211 5.377726 3.489774 12.770314 1835.174890 929 | TGCGC 98.581368 8.394211 6.980987 5.161483 12.770314 3136.301964 930 | TGCGG 98.673061 8.394211 5.299796 3.414192 12.770314 2375.559933 931 | TGCGT 96.635756 8.394211 5.914707 4.025307 12.770314 2018.915763 932 | TGCTA 107.943353 3.551179 3.389568 1.438454 18.820938 1799.993468 933 | TGCTC 108.757106 3.551179 3.710641 1.647604 18.820938 1938.522643 934 | TGCTG 108.066432 3.551179 3.832878 1.729684 18.820938 4344.833534 935 | TGCTT 109.232881 3.551179 4.032510 1.866562 18.820938 2680.367718 936 | TGGAA 120.392424 3.173751 3.362566 1.664883 13.716547 3772.662463 937 | TGGAC 118.046893 3.173751 3.152222 1.511134 13.716547 1714.532318 938 | TGGAG 120.172787 3.173751 3.583513 1.831644 13.716547 1889.615898 939 | TGGAT 125.168284 3.173751 3.232973 1.569570 13.716547 2693.086527 940 | TGGCA 104.828979 5.310878 4.316415 2.247841 15.916164 2989.426462 941 | TGGCC 103.755501 5.310878 5.097647 2.884931 15.916164 1707.457962 942 | TGGCG 107.590784 5.310878 5.694623 3.406264 15.916164 4264.699004 943 | TGGCT 108.631122 5.310878 4.490811 2.385437 15.916164 2683.481606 944 | TGGGA 113.621342 4.047423 3.734012 1.423669 25.686727 2019.981380 945 | TGGGC 113.117956 4.047423 6.040356 2.929135 25.686727 1437.967727 946 | TGGGG 111.573080 4.047423 4.294491 1.755954 25.686727 1307.368296 947 | TGGGT 112.203445 4.047423 4.698157 2.009264 25.686727 1999.767949 948 | TGGTA 118.634757 3.368228 3.887914 1.754670 19.087935 3063.177091 949 | TGGTC 114.414051 3.368228 3.875581 1.746328 19.087935 2267.938459 950 | TGGTG 115.659191 3.368228 4.775644 2.388738 19.087935 4195.927445 951 | TGGTT 118.828514 3.368228 3.793919 1.691424 19.087935 3627.121737 952 | TGTAA 109.083260 8.555862 6.874206 4.475919 16.214488 2107.695765 953 | TGTAC 113.988004 8.555862 7.097988 4.696250 16.214488 1661.673499 954 | TGTAG 106.931210 8.555862 5.930585 3.586696 16.214488 1580.799624 955 | TGTAT 110.146507 8.555862 7.027544 4.626512 16.214488 2183.289209 956 | TGTCA 109.113377 6.071202 6.309393 3.509829 20.388755 2270.262394 957 | TGTCC 112.006575 6.071202 6.463931 3.639567 20.388755 1325.958427 958 | TGTCG 110.082064 6.071202 6.298507 3.500749 20.388755 2184.271247 959 | TGTCT 111.929174 6.071202 6.560635 3.721547 20.388755 2086.684003 960 | TGTGA 94.897960 9.689941 5.055932 3.391050 11.239198 2069.698125 961 | TGTGC 95.267558 9.689941 6.793745 5.281973 11.239198 2064.216908 962 | TGTGG 99.408797 9.689941 5.407165 3.750479 11.239198 2132.588647 963 | TGTGT 93.181833 9.689941 5.577706 3.929306 11.239198 1824.806388 964 | TGTTA 106.434881 7.489815 5.221768 2.651564 20.251096 2861.224822 965 | TGTTC 109.695268 7.489815 5.672306 3.002031 20.251096 3117.798588 966 | TGTTG 107.050463 7.489815 5.571732 2.922544 20.251096 4670.084192 967 | TGTTT 109.493410 7.489815 5.930712 3.209490 20.251096 4459.973555 968 | TTAAA 92.047985 3.040826 1.771121 0.745080 10.007805 5064.587652 969 | TTAAC 90.306298 3.040826 1.667715 0.680790 10.007805 3370.715345 970 | TTAAG 89.640417 3.040826 2.147449 0.994752 10.007805 1856.045216 971 | TTAAT 94.218253 3.040826 1.936389 0.851764 10.007805 5117.129177 972 | TTACA 79.879570 2.102993 1.441496 0.593624 8.499982 2627.054234 973 | TTACC 76.727466 2.102993 1.309049 0.513718 8.499982 3677.355325 974 | TTACG 80.271204 2.102993 1.630208 0.713930 8.499982 2270.967371 975 | TTACT 84.199146 2.102993 1.536574 0.653313 8.499982 2772.474659 976 | TTAGA 103.759006 3.781763 3.347636 1.470059 17.359805 2915.612694 977 | TTAGC 94.883442 3.781763 2.445094 0.917637 17.359805 2219.643292 978 | TTAGG 92.383719 3.781763 3.088107 1.302465 17.359805 1090.835729 979 | TTAGT 95.991605 3.781763 2.422460 0.904925 17.359805 2128.527794 980 | TTATA 93.293356 2.632201 2.179288 1.020137 9.945506 3055.032922 981 | TTATC 90.317451 2.632201 1.835229 0.788355 9.945506 4872.752267 982 | TTATG 89.800603 2.632201 2.297016 1.103907 9.945506 3198.706167 983 | TTATT 94.204914 2.632201 2.017525 0.908688 9.945506 6793.336756 984 | TTCAA 80.104941 2.300504 1.407776 0.617688 7.312444 4939.288020 985 | TTCAC 77.152869 2.300504 1.248347 0.515788 7.312444 2643.169589 986 | TTCAG 75.457517 2.300504 1.661689 0.792124 7.312444 3856.169909 987 | TTCAT 80.433283 2.300504 1.336412 0.571320 7.312444 4347.981776 988 | TTCCA 70.425126 2.298440 1.283961 0.557296 6.815280 5265.496203 989 | TTCCC 68.771943 2.298440 1.381931 0.622282 6.815280 2036.961820 990 | TTCCG 70.811221 2.298440 1.374837 0.617497 6.815280 2898.080211 991 | TTCCT 72.025713 2.298440 1.290897 0.561818 6.815280 2680.085087 992 | TTCGA 84.697380 2.302527 2.173210 0.940918 11.593152 2795.060660 993 | TTCGC 80.794099 2.302527 1.665339 0.631180 11.593152 2657.652006 994 | TTCGG 81.445733 2.302527 2.063826 0.870781 11.593152 1649.366041 995 | TTCGT 81.561854 2.302527 1.695118 0.648185 11.593152 2762.883026 996 | TTCTA 79.453763 2.065374 1.437361 0.615586 7.836445 2506.699673 997 | TTCTC 77.296677 2.065374 1.324093 0.544275 7.836445 2934.522931 998 | TTCTG 79.585705 2.065374 1.541031 0.683372 7.836445 3540.987693 999 | TTCTT 79.328157 2.065374 1.256885 0.503366 7.836445 5295.591733 1000 | TTGAA 104.815901 4.487612 3.159098 1.401880 16.042343 6846.775401 1001 | TTGAC 104.197468 4.487612 3.092976 1.358097 16.042343 3726.636886 1002 | TTGAG 102.991893 4.487612 3.108944 1.368628 16.042343 3107.786488 1003 | TTGAT 108.318693 4.487612 3.236773 1.453900 16.042343 6749.480656 1004 | TTGCA 92.627509 3.125398 2.307340 1.071812 10.692942 3644.677646 1005 | TTGCC 89.548110 3.125398 2.189044 0.990451 10.692942 4246.455061 1006 | TTGCG 92.522648 3.125398 2.881254 1.495628 10.692942 2679.931508 1007 | TTGCT 94.954930 3.125398 2.205828 1.001864 10.692942 5361.103338 1008 | TTGGA 105.726794 3.994511 2.914112 1.139515 19.058043 4318.796164 1009 | TTGGC 100.451207 3.994511 3.443299 1.463602 19.058043 3199.245740 1010 | TTGGG 98.912810 3.994511 2.777603 1.060391 19.058043 2727.639066 1011 | TTGGT 102.467791 3.994511 2.694289 1.013041 19.058043 5029.363875 1012 | TTGTA 99.353452 3.775722 3.007985 1.598115 10.656408 2620.008057 1013 | TTGTC 96.281916 3.775722 2.619934 1.299063 10.656408 3684.218947 1014 | TTGTG 97.143134 3.775722 3.734051 2.210370 10.656408 2540.744636 1015 | TTGTT 99.718382 3.775722 2.760729 1.405174 10.656408 6269.598766 1016 | TTTAA 84.437742 2.458906 1.423343 0.602201 7.951444 4736.695450 1017 | TTTAC 82.793640 2.458906 1.618360 0.730112 7.951444 3138.184929 1018 | TTTAG 83.947046 2.458906 1.728099 0.805620 7.951444 2191.749243 1019 | TTTAT 84.611983 2.458906 1.384147 0.577498 7.951444 6407.227642 1020 | TTTCA 78.129826 2.072592 1.239803 0.497795 7.690558 5498.677020 1021 | TTTCC 75.285607 2.072592 1.297503 0.532947 7.690558 4625.176795 1022 | TTTCG 79.067070 2.072592 1.382727 0.586308 7.690558 3173.546040 1023 | TTTCT 79.594244 2.072592 1.286599 0.526243 7.690558 4743.357081 1024 | TTTGA 89.941598 2.649410 2.179371 0.900835 12.755660 6949.737643 1025 | TTTGC 83.798312 2.649410 1.697293 0.619133 12.755660 4109.930791 1026 | TTTGG 86.605735 2.649410 2.050856 0.822340 12.755660 4868.310236 1027 | TTTGT 85.149531 2.649410 1.697217 0.619091 12.755660 5287.647733 1028 | TTTTA 79.280022 1.969599 1.220980 0.499749 7.288230 4707.108280 1029 | TTTTC 77.911416 1.969599 1.171952 0.469952 7.288230 6591.982104 1030 | TTTTG 81.033026 1.969599 1.368173 0.592790 7.288230 5640.076002 1031 | TTTTT 80.784332 1.969599 1.174629 0.471563 7.288230 7681.882510 1032 | -------------------------------------------------------------------------------- /MetaCompore/resources/mines/AGACT_random_forest_model.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a-slide/MetaCompore/bdedefdab566fabf771e8dbb66c1717b9ee903da/MetaCompore/resources/mines/AGACT_random_forest_model.pickle -------------------------------------------------------------------------------- /MetaCompore/resources/mines/GGACA_random_forest_model.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a-slide/MetaCompore/bdedefdab566fabf771e8dbb66c1717b9ee903da/MetaCompore/resources/mines/GGACA_random_forest_model.pickle -------------------------------------------------------------------------------- /MetaCompore/resources/mines/GGACC_random_forest_model.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a-slide/MetaCompore/bdedefdab566fabf771e8dbb66c1717b9ee903da/MetaCompore/resources/mines/GGACC_random_forest_model.pickle -------------------------------------------------------------------------------- /MetaCompore/resources/mines/GGACT_random_forest_model.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a-slide/MetaCompore/bdedefdab566fabf771e8dbb66c1717b9ee903da/MetaCompore/resources/mines/GGACT_random_forest_model.pickle -------------------------------------------------------------------------------- /MetaCompore/resources/mines/names.txt: -------------------------------------------------------------------------------- 1 | AGACT_random_forest_model.pickle 2 | GGACA_random_forest_model.pickle 3 | GGACC_random_forest_model.pickle 4 | GGACT_random_forest_model.pickle 5 | -------------------------------------------------------------------------------- /MetaCompore/workflow/rules/alignment.smk: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | # Std lib 6 | from os.path import join 7 | 8 | ##### Rules ##### 9 | module_name="alignment" 10 | 11 | rule_name="minimap2_index" 12 | rule minimap2_index: 13 | input: fasta=rules.get_transcriptome.output.fasta 14 | output: idx=join("results", module_name, rule_name, "transcriptome_reference.mmi") 15 | log: join("logs",module_name, rule_name, "name.log") 16 | threads: get_threads(config, rule_name) 17 | params: opt=get_opt(config, rule_name) 18 | resources: mem_mb=get_mem(config, rule_name) 19 | container: "library://aleg/default/minimap2:2.17" 20 | shell: "minimap2 -t {threads} {params.opt} -d {output.idx} {input.fasta} &> {log}" 21 | 22 | rule_name="minimap2_align" 23 | rule minimap2_align: 24 | input: 25 | idx=rules.minimap2_index.output.idx, 26 | fastq=rules.merge_fastq.output.fastq 27 | output: 28 | bam=temp(join("results", module_name, rule_name, "{cond}_{rep}.bam")), 29 | bam_index=temp(join("results", module_name, rule_name, "{cond}_{rep}.bam.bai")) 30 | log: join("logs",module_name, rule_name, "{cond}_{rep}.log") 31 | threads: get_threads(config, rule_name) 32 | params: opt=get_opt(config, rule_name) 33 | resources: mem_mb=get_mem(config, rule_name) 34 | container: "library://aleg/default/minimap2:2.17" 35 | shell: "minimap2 -t {threads} {params.opt} {input.idx} {input.fastq} 2> {log} | \ 36 | samtools view -bh 2>> {log} | \ 37 | samtools sort > {output.bam} 2>> {log} \ 38 | && samtools index {output.bam} 2>> {log}" 39 | 40 | rule_name="alignmemt_prefilter" 41 | rule alignmemt_prefilter: 42 | input: 43 | bam=rules.minimap2_align.output.bam, 44 | bam_index=rules.minimap2_align.output.bam_index 45 | output: 46 | bam=temp(join("results", module_name, rule_name, "{cond}_{rep}.bam")), 47 | bam_index=temp(join("results", module_name, rule_name, "{cond}_{rep}.bam.bai")), 48 | reads_index=temp(join("results", module_name, rule_name, "{cond}_{rep}.bam.idx.gz")) 49 | log: join("logs",module_name, rule_name, "{cond}_{rep}.log") 50 | threads: get_threads(config, rule_name) 51 | params: opt=get_opt(config, rule_name) 52 | resources: mem_mb=get_mem(config, rule_name) 53 | container: "library://aleg/default/pybiotools:0.2.7" 54 | shell: "pyBioTools Alignment Filter {params.opt} -i {input.bam} -o {output.bam} --verbose &> {log}" 55 | 56 | rule_name="min_ref_coverage" 57 | rule min_ref_coverage: 58 | input: 59 | reads_index_list=expand(join("results", module_name, "alignmemt_prefilter", "{cond}_{rep}.bam.idx.gz"), cond=condition_list, rep=replicates_list) 60 | output: 61 | ref_list=join("results", module_name, rule_name, "valid_references_list.txt"), 62 | log: join("logs",module_name, rule_name, "out.log") 63 | threads: get_threads(config, rule_name) 64 | params: opt=get_opt(config, rule_name) 65 | resources: mem_mb=get_mem(config, rule_name) 66 | container: "library://aleg/default/metacompore_python:3.8.6" 67 | script: "../scripts/min_ref_coverage.py" 68 | 69 | rule_name="alignmemt_postfilter" 70 | rule alignmemt_postfilter: 71 | input: 72 | bam=rules.alignmemt_prefilter.output.bam, 73 | bam_index=rules.alignmemt_prefilter.output.bam_index, 74 | ref_list=rules.min_ref_coverage.output.ref_list 75 | output: 76 | bam=join("results", module_name, rule_name, "{cond}_{rep}.bam"), 77 | bam_index=join("results", module_name, rule_name, "{cond}_{rep}.bam.bai"), 78 | reads_index=join("results", module_name, rule_name, "{cond}_{rep}.bam.idx.gz"), 79 | selected_reads_fn=join("results", module_name, rule_name, "{cond}_{rep}_selected_read_ids.txt") 80 | log: join("logs",module_name, rule_name, "{cond}_{rep}.log") 81 | threads: get_threads(config, rule_name) 82 | params: opt=get_opt(config, rule_name) 83 | resources: mem_mb=get_mem(config, rule_name) 84 | container: "library://aleg/default/pybiotools:0.2.7" 85 | shell: "pyBioTools Alignment Filter {params.opt} -i {input.bam} --select_ref_fn {input.ref_list} -o {output.bam} -l {output.selected_reads_fn} --verbose &> {log}" 86 | 87 | rule_name="alignmemt_merge" 88 | rule alignmemt_merge: 89 | input: 90 | bam_list=expand(join("results", module_name, "alignmemt_postfilter", "{{cond}}_{rep}.bam"), rep=replicates_list), 91 | bam_index_list=expand(join("results", module_name, "alignmemt_postfilter", "{{cond}}_{rep}.bam.bai"), rep=replicates_list) 92 | output: 93 | bam=join("results", module_name, rule_name, "{cond}.bam"), 94 | bam_index=join("results", module_name, rule_name, "{cond}.bam.bai"), 95 | log: join("logs",module_name, rule_name, "{cond}.log") 96 | threads: get_threads(config, rule_name) 97 | params: opt=get_opt(config, rule_name) 98 | resources: mem_mb=get_mem(config, rule_name) 99 | container: "library://aleg/default/minimap2:2.17" 100 | shell: "samtools merge {params.opt} -@ {threads} {output.bam} {input.bam_list} &> {log} && samtools index {output.bam} &> {log}" 101 | -------------------------------------------------------------------------------- /MetaCompore/workflow/rules/basecalling.smk: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | # Std lib 6 | from os.path import join 7 | 8 | ##### Rules ##### 9 | module_name="basecalling" 10 | 11 | if config["gpu_acceleration"]: 12 | guppy_container="library://aleg/default/ont_guppy:gpu-4.2.2" 13 | else: 14 | guppy_container="library://aleg/default/ont_guppy:cpu-4.2.2" 15 | 16 | rule_name="ont_guppy" 17 | rule ont_guppy: 18 | input: fast5_dir=get_fast5 19 | output: 20 | seqsum=join("results", module_name, rule_name, "{cond}_{rep}","sequencing_summary.txt"), 21 | fastq_dir=directory(join("results", module_name, rule_name, "{cond}_{rep}")) 22 | log: join("logs", module_name, rule_name, "{cond}_{rep}.log") 23 | threads: get_threads(config, rule_name) 24 | params: opt=get_opt(config, rule_name) 25 | resources: mem_mb=get_mem(config, rule_name) 26 | container: guppy_container 27 | shell: "guppy_basecaller {params.opt} -i {input.fast5_dir} -s {output.fastq_dir} &> {log}" 28 | 29 | rule_name="merge_fastq" 30 | rule merge_fastq: 31 | input: fastq_dir=rules.ont_guppy.output.fastq_dir 32 | output: fastq=join("results", module_name, rule_name, "{cond}_{rep}.fastq") 33 | log: join("logs",module_name, rule_name, "{cond}_{rep}.log") 34 | threads: get_threads(config, rule_name) 35 | params: opt=get_opt(config, rule_name) 36 | resources: mem_mb=get_mem(config, rule_name) 37 | container: "library://aleg/default/pybiotools:0.2.7" 38 | shell: "pyBioTools Fastq Filter {params.opt} -i {input.fastq_dir} -o {output.fastq} --verbose &> {log}" 39 | -------------------------------------------------------------------------------- /MetaCompore/workflow/rules/common.smk: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | #~~~~~~~~~~~~~~IMPORTS~~~~~~~~~~~~~~# 4 | # Standard library imports 5 | import os 6 | import sys 7 | from collections import * 8 | import shutil 9 | 10 | # Third party lib 11 | import pandas as pd 12 | from snakemake.logging import logger 13 | from snakemake.io import load_configfile 14 | from snakemake.utils import validate 15 | 16 | ##### Input files validation functions ##### 17 | 18 | def config_load_validate(configfile, schema): 19 | config = load_configfile(configfile) 20 | validate(config, schema=schema) 21 | logger.debug(config) 22 | return config 23 | 24 | def samples_load_validate(samplefile, schema): 25 | samples_df = pd.read_csv (samplefile, comment="#", skip_blank_lines=True, sep="\t") 26 | validate(samples_df, schema=schema) 27 | samples_df.set_index("sample_id", drop=True, inplace=True) 28 | 29 | # Check that conditions and control and test 30 | conditions = sorted(list(samples_df["condition"].unique())) 31 | if conditions != ["control", "test"]: 32 | logger.error("Metacompore requires exactly 2 conditions: `control` and `test`") 33 | sys.exit() 34 | 35 | # Check that the conditions have the same replicates 36 | rep1 = sorted(list(samples_df["replicate"][samples_df["condition"]=="control"])) 37 | rep2 = sorted(list(samples_df["replicate"][samples_df["condition"]=="test"])) 38 | if rep1 != rep2: 39 | logger.error("The 2 condition groups requires the same number of replicates") 40 | sys.exit() 41 | 42 | logger.debug(samples_df) 43 | return samples_df 44 | 45 | ##### Getter functions ##### 46 | 47 | def get_fast5 (wildcards): 48 | res = samples_df[(samples_df["condition"]==wildcards.cond)&(samples_df["replicate"]==int(wildcards.rep))] 49 | return res["fast5_dir"][0] 50 | 51 | def get_threads (config, rule_name, default=1): 52 | try: 53 | return config[rule_name]["threads"] 54 | except (KeyError, TypeError): 55 | logger.error("Could not find value `threads` for rule `{}` in config file".format(rule_name)) 56 | return default 57 | 58 | def get_opt (config, rule_name, default=""): 59 | try: 60 | return config[rule_name]["opt"] 61 | except KeyError: 62 | logger.error("Could not find value `opt` for rule `{}` in config file".format(rule_name)) 63 | return default 64 | 65 | def get_mem (config, rule_name, default=1000): 66 | try: 67 | return config[rule_name]["mem_mb"] 68 | except KeyError: 69 | logger.error("Could not find value `mem_mb` for rule `{}` in config file".format(rule_name)) 70 | return default 71 | 72 | ##### Helper functions ##### 73 | 74 | def mkdir (fn, exist_ok=False): 75 | """ Create directory recursivelly. Raise IO error if path exist or if error at creation """ 76 | try: 77 | os.makedirs (fn, exist_ok=exist_ok) 78 | except: 79 | raise NanocomporeError ("Error creating output folder `{}`".format(fn)) 80 | 81 | def access_file (fn, **kwargs): 82 | """ Check if the file is readable """ 83 | return os.path.isfile (fn) and os.access (fn, os.R_OK) 84 | -------------------------------------------------------------------------------- /MetaCompore/workflow/rules/differr.smk: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | # Std lib 6 | from os.path import join 7 | 8 | ##### Rules ##### 9 | module_name = "differr" 10 | 11 | rule_name="differr_compare" 12 | rule differr_compare: 13 | input: 14 | control_bam=expand(join("results", "alignment", "alignmemt_postfilter", "control_{rep}.bam"), rep=replicates_list), 15 | test_bam=expand(join("results", "alignment", "alignmemt_postfilter", "test_{rep}.bam"), rep=replicates_list), 16 | fasta = rules.get_transcriptome.output.fasta 17 | output: 18 | res_bed=join("results", module_name, rule_name, "differr_results.bed"), 19 | log: join("logs",module_name, rule_name, "out.log") 20 | threads: get_threads(config, rule_name) 21 | params: opt=get_opt(config, rule_name) 22 | resources: mem_mb=get_mem(config, rule_name) 23 | container: "library://aleg/default/differr_nanopore_drs:latest" 24 | script: "../scripts/differr_compare.py" 25 | 26 | rule_name="differr_postprocess" 27 | rule differr_postprocess: 28 | input: 29 | res_bed=rules.differr_compare.output.res_bed 30 | output: 31 | res_tsv=join("results", "final", "{}_results.tsv".format(module_name)) 32 | log: join("logs",module_name, rule_name, "out.log"), 33 | threads: get_threads(config, rule_name) 34 | params: opt=get_opt(config, rule_name) 35 | resources: mem_mb=get_mem(config, rule_name) 36 | container: "library://aleg/default/metacompore_python:3.8.6" 37 | script: "../scripts/differr_postprocess.py" 38 | -------------------------------------------------------------------------------- /MetaCompore/workflow/rules/eligos2.smk: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | # Std lib 6 | from os.path import join 7 | 8 | ##### Rules ##### 9 | module_name = "eligos2" 10 | 11 | rule_name="eligos2_fasta_to_bed" 12 | rule eligos2_fasta_to_bed: 13 | input: 14 | fasta = rules.get_transcriptome.output.fasta 15 | output: 16 | bed = temp(join("results", module_name, rule_name, "transcriptome_reference.bed")) 17 | log: join("logs",module_name, rule_name, "out.log") 18 | threads: get_threads(config, rule_name) 19 | params: opt=get_opt(config, rule_name) 20 | resources: mem_mb=get_mem(config, rule_name) 21 | container: "library://aleg/default/metacompore_python:3.8.6" 22 | shell: "faidx --transform bed {input.fasta} > {output.bed}" 23 | 24 | rule_name="eligos2_pair_diff_mod" 25 | rule eligos2_pair_diff_mod: 26 | input: 27 | control_bam=join("results", "alignment", "alignmemt_merge", "control.bam"), 28 | test_bam=join("results", "alignment", "alignmemt_merge", "test.bam"), 29 | fasta=rules.get_transcriptome.output.fasta, 30 | bed=rules.eligos2_fasta_to_bed.output.bed 31 | output: 32 | res_tsv=join("results", module_name, rule_name, "test_vs_control_on_transcriptome_reference_combine.txt") 33 | log: join("logs",module_name, rule_name, "out.log") 34 | threads: get_threads(config, rule_name) 35 | params: opt=get_opt(config, rule_name) 36 | resources: mem_mb=get_mem(config, rule_name) 37 | container: "library://aleg/default/eligos2:2.0.0" 38 | script: "../scripts/eligos2_pair_diff_mod.py" 39 | 40 | rule_name="eligos2_postprocess" 41 | rule eligos2_postprocess: 42 | input: 43 | res_tsv=rules.eligos2_pair_diff_mod.output.res_tsv 44 | output: 45 | res_tsv=join("results", "final", "{}_results.tsv".format(module_name)) 46 | log: join("logs",module_name, rule_name, "out.log"), 47 | threads: get_threads(config, rule_name) 48 | params: opt=get_opt(config, rule_name) 49 | resources: mem_mb=get_mem(config, rule_name) 50 | container: "library://aleg/default/metacompore_python:3.8.6" 51 | script: "../scripts/eligos2_postprocess.py" 52 | -------------------------------------------------------------------------------- /MetaCompore/workflow/rules/epinano.smk: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | # Std lib 6 | from os.path import join 7 | 8 | ##### Rules ##### 9 | module_name="epinano" 10 | 11 | # Scattergather config 12 | scattergather: 13 | epinano_bam_split=10 14 | 15 | 16 | if config["gpu_acceleration"]: 17 | guppy_container="library://aleg/default/ont_guppy:gpu-3.1.5" 18 | else: 19 | guppy_container="library://aleg/default/ont_guppy:cpu-3.1.5" 20 | 21 | rule_name="ont_guppy_epinano" 22 | rule ont_guppy_epinano: 23 | input: fast5_dir=get_fast5 24 | output: 25 | seqsum=join("results", module_name, rule_name, "{cond}_{rep}","sequencing_summary.txt"), 26 | fastq_dir=directory(join("results", module_name, rule_name, "{cond}_{rep}")) 27 | log: join("logs", module_name, rule_name, "{cond}_{rep}.log") 28 | threads: get_threads(config, rule_name) 29 | params: opt=get_opt(config, rule_name) 30 | resources: mem_mb=get_mem(config, rule_name) 31 | container: guppy_container 32 | shell: "guppy_basecaller {params.opt} -i {input.fast5_dir} -s {output.fastq_dir} &> {log}" 33 | 34 | rule_name="merge_fastq_epinano" 35 | rule merge_fastq_epinano: 36 | input: fastq_dir=rules.ont_guppy_epinano.output.fastq_dir 37 | output: fastq=join("results", module_name, rule_name, "{cond}_{rep}.fastq") 38 | log: join("logs",module_name, rule_name, "{cond}_{rep}.log") 39 | threads: get_threads(config, rule_name) 40 | params: opt=get_opt(config, rule_name) 41 | resources: mem_mb=get_mem(config, rule_name) 42 | container: "library://aleg/default/pybiotools:0.2.7" 43 | shell: "pyBioTools Fastq Filter {params.opt} -i {input.fastq_dir} -o {output.fastq} --verbose &> {log}" 44 | 45 | rule_name="minimap2_align_epinano" 46 | rule minimap2_align_epinano: 47 | input: 48 | idx=rules.minimap2_index.output.idx, 49 | fastq=rules.merge_fastq_epinano.output.fastq 50 | output: 51 | bam=temp(join("results", module_name, rule_name, "{cond}_{rep}.bam")), 52 | bam_index=temp(join("results", module_name, rule_name, "{cond}_{rep}.bam.bai")) 53 | log: join("logs",module_name, rule_name, "{cond}_{rep}.log") 54 | threads: get_threads(config, rule_name) 55 | params: opt=get_opt(config, rule_name) 56 | resources: mem_mb=get_mem(config, rule_name) 57 | container: "library://aleg/default/minimap2:2.17" 58 | shell: "minimap2 -t {threads} {params.opt} {input.idx} {input.fastq} 2> {log} | \ 59 | samtools view -bh 2>> {log} | \ 60 | samtools sort > {output.bam} 2>> {log} \ 61 | && samtools index {output.bam} 2>> {log}" 62 | 63 | rule_name="alignmemt_prefilter_epinano" 64 | rule alignmemt_prefilter_epinano: 65 | input: 66 | bam=rules.minimap2_align_epinano.output.bam, 67 | bam_index=rules.minimap2_align_epinano.output.bam_index 68 | output: 69 | bam=temp(join("results", module_name, rule_name, "{cond}_{rep}.bam")), 70 | bam_index=temp(join("results", module_name, rule_name, "{cond}_{rep}.bam.bai")), 71 | reads_index=temp(join("results", module_name, rule_name, "{cond}_{rep}.bam.idx.gz")) 72 | log: join("logs",module_name, rule_name, "{cond}_{rep}.log") 73 | threads: get_threads(config, rule_name) 74 | params: opt=get_opt(config, rule_name) 75 | resources: mem_mb=get_mem(config, rule_name) 76 | container: "library://aleg/default/pybiotools:0.2.7" 77 | shell: "pyBioTools Alignment Filter {params.opt} -i {input.bam} -o {output.bam} --verbose &> {log}" 78 | 79 | rule_name="min_ref_coverage_epinano" 80 | rule min_ref_coverage_epinano: 81 | input: 82 | reads_index_list=expand(join("results", module_name, "alignmemt_prefilter_epinano", "{cond}_{rep}.bam.idx.gz"), cond=condition_list, rep=replicates_list) 83 | output: 84 | ref_list=join("results", module_name, rule_name, "valid_references_list.txt"), 85 | log: join("logs",module_name, rule_name, "out.log") 86 | threads: get_threads(config, rule_name) 87 | params: opt=get_opt(config, rule_name) 88 | resources: mem_mb=get_mem(config, rule_name) 89 | container: "library://aleg/default/metacompore_python:3.8.6" 90 | script: "../scripts/min_ref_coverage.py" 91 | 92 | rule_name="alignmemt_postfilter_epinano" 93 | rule alignmemt_postfilter_epinano: 94 | input: 95 | bam=rules.alignmemt_prefilter_epinano.output.bam, 96 | bam_index=rules.alignmemt_prefilter_epinano.output.bam_index, 97 | ref_list=rules.min_ref_coverage_epinano.output.ref_list 98 | output: 99 | bam=join("results", module_name, rule_name, "{cond}_{rep}.bam"), 100 | bam_index=join("results", module_name, rule_name, "{cond}_{rep}.bam.bai"), 101 | reads_index=join("results", module_name, rule_name, "{cond}_{rep}.bam.idx.gz"), 102 | selected_reads_fn=join("results", module_name, rule_name, "{cond}_{rep}_selected_read_ids.txt") 103 | log: join("logs",module_name, rule_name, "{cond}_{rep}.log") 104 | threads: get_threads(config, rule_name) 105 | params: opt=get_opt(config, rule_name) 106 | resources: mem_mb=get_mem(config, rule_name) 107 | container: "library://aleg/default/pybiotools:0.2.7" 108 | shell: "pyBioTools Alignment Filter {params.opt} -i {input.bam} --select_ref_fn {input.ref_list} -o {output.bam} -l {output.selected_reads_fn} --verbose &> {log}" 109 | 110 | rule_name="alignmemt_merge_epinano" 111 | rule alignmemt_merge_epinano: 112 | input: 113 | bam_list=expand(join("results", module_name, "alignmemt_postfilter_epinano", "{{cond}}_{rep}.bam"), rep=replicates_list), 114 | bam_index_list=expand(join("results", module_name, "alignmemt_postfilter_epinano", "{{cond}}_{rep}.bam.bai"), rep=replicates_list) 115 | output: 116 | bam=join("results", module_name, rule_name, "{cond}.bam"), 117 | bam_index=join("results", module_name, rule_name, "{cond}.bam.bai"), 118 | log: join("logs",module_name, rule_name, "{cond}.log") 119 | threads: get_threads(config, rule_name) 120 | params: opt=get_opt(config, rule_name) 121 | resources: mem_mb=get_mem(config, rule_name) 122 | container: "library://aleg/default/minimap2:2.17" 123 | shell: "samtools merge {params.opt} -@ {threads} {output.bam} {input.bam_list} &> {log} && samtools index {output.bam} &> {log}" 124 | 125 | 126 | rule_name="epinano_splitbam" 127 | rule epinano_splitbam: 128 | input: 129 | bam=rules.alignmemt_merge_epinano.output.bam 130 | output: 131 | temp(scatter.epinano_bam_split(join("results", module_name, rule_name, "{{cond}}_split/{{cond}}.{scatteritem}.bam"))) 132 | container: "library://aleg/default/pybiotools:0.2.7" 133 | shell: "pyBioTools Alignment Split --index -i {input.bam} -n 10 --output_fn_list {output}" 134 | 135 | rule_name="epinano_variants" 136 | rule epinano_variants: 137 | input: 138 | bam=join("results", module_name, "epinano_splitbam", "{cond}_split/{cond}.{scatteritem}.bam"), 139 | fasta=rules.get_transcriptome.output.fasta, 140 | picard_dict=rules.generate_transcriptome_picard_index.output.picard_dict 141 | output: 142 | variants=temp(join("results", module_name, rule_name, "{cond}.{scatteritem}.plus_strand.per_site.5mer.csv")) 143 | log: join("logs",module_name, rule_name, "{cond}.{scatteritem}.log"), 144 | threads: get_threads(config, rule_name) 145 | resources: mem_mb=get_mem(config, rule_name) 146 | container: "library://aleg/default/epinano:1.2.0" 147 | shell: 148 | """ 149 | Epinano_Variants -R {input.fasta} -b {input.bam} -n {threads} -T t -s /usr/EpiNano/misc/sam2tsv.jar 150 | # This mv is needed because Epinano_Variants and Slide_Variants.py don't allow changing the output path 151 | mv $(dirname {input.bam})/{wildcards.cond}.{wildcards.scatteritem}.plus_strand.per.site.csv $(dirname {output.variants})/{wildcards.cond}.{wildcards.scatteritem}.plus_strand 152 | python /usr/EpiNano/misc/Slide_Variants.py $(dirname {output.variants})/{wildcards.cond}.{wildcards.scatteritem}.plus_strand 5 153 | """ 154 | 155 | rule_name="epinano_filter_rrach_variants" 156 | rule epinano_filter_rrach_variants: 157 | input: 158 | variants=rules.epinano_variants.output.variants 159 | output: 160 | filteredvariants=temp(join("results", module_name, rule_name, "{cond}.{scatteritem}.plus_strand.per_site.5mer.csv.filtered")) 161 | log: join("logs",module_name, rule_name, "{cond}.{scatteritem}.log"), 162 | threads: get_threads(config, rule_name) 163 | resources: mem_mb=get_mem(config, rule_name) 164 | container: "library://aleg/default/epinano:1.2.0" 165 | script: "../scripts/epinano_filter_kmers.py" 166 | 167 | rule_name="epinano_gather_variants" 168 | rule epinano_gather_variants: 169 | input: 170 | gather.epinano_bam_split(join("results", module_name, "epinano_filter_rrach_variants", "{{cond}}.{scatteritem}.plus_strand.per_site.5mer.csv.filtered")) 171 | output: 172 | filteredvariants=join("results", module_name, rule_name, "{cond}.all.plus_strand.per_site.5mer.csv") 173 | log: join("logs",module_name, rule_name, "{cond}.log"), 174 | threads: get_threads(config, rule_name) 175 | resources: mem_mb=get_mem(config, rule_name) 176 | container: "library://aleg/default/epinano:1.2.0" 177 | shell: "cat {input} | awk 'NR==1 || !/^#/' > {output}" 178 | 179 | rule_name="epinano_predict" 180 | rule epinano_predict: 181 | input: 182 | variants=rules.epinano_gather_variants.output.filteredvariants 183 | output: 184 | predictions=temp(join("results", module_name, rule_name, "{cond}.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv")) 185 | log: join("logs",module_name, rule_name, "{cond}.log"), 186 | threads: get_threads(config, rule_name) 187 | resources: mem_mb=get_mem(config, rule_name) 188 | container: "library://aleg/default/epinano:1.2.0" 189 | shell: "Epinano_Predict -o $(dirname {output.predictions})/{wildcards.cond} -M /usr/EpiNano/models/rrach.q3.mis3.del3.linear.dump -p {input.variants} -cl 8,13,23" 190 | 191 | rule_name="epinano_delta_variants" 192 | rule epinano_delta_variants: 193 | input: 194 | control_variants=join("results", module_name, "epinano_gather_variants", "control.all.plus_strand.per_site.5mer.csv"), 195 | test_variants=join("results", module_name, "epinano_gather_variants", "test.all.plus_strand.per_site.5mer.csv") 196 | output: 197 | delta=join("results", module_name, rule_name, "test_control_delta.5mer.csv") 198 | log: join("logs",module_name, rule_name, "delta_variants.log"), 199 | threads: get_threads(config, rule_name) 200 | params: opt=get_opt(config, rule_name) 201 | resources: mem_mb=get_mem(config, rule_name) 202 | container: "library://aleg/default/epinano:1.2.0" 203 | shell: "python /usr/EpiNano/misc/Epinano_make_delta.py {input.test_variants} {input.control_variants} {params.opt[min_cov]} 5 > {output.delta}" 204 | 205 | rule_name="epinano_delta_predict" 206 | rule epinano_delta_predict: 207 | input: 208 | delta_variants=rules.epinano_delta_variants.output.delta 209 | output: 210 | predictions=temp(join("results", module_name, rule_name, "test_control.DeltaMis3.DeltaDel3.DeltaQ3.MODEL.rrach.deltaQ3.deltaMis3.deltaDel3.linear.dump.csv")) 211 | log: join("logs",module_name, rule_name, "delta_variants.log"), 212 | threads: get_threads(config, rule_name) 213 | params: opt=get_opt(config, rule_name) 214 | resources: mem_mb=get_mem(config, rule_name) 215 | container: "library://aleg/default/epinano:1.2.0" 216 | shell: "Epinano_Predict -o $(dirname {output.predictions})/test_control -M /usr/EpiNano/models/rrach.deltaQ3.deltaMis3.deltaDel3.linear.dump -p {input.delta_variants} -cl 7,12,22" 217 | 218 | 219 | 220 | rule_name="epinano_postprocess" 221 | rule epinano_postprocess: 222 | input: 223 | join("results", module_name, "epinano_predict", "test.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv"), 224 | join("results", module_name, "epinano_predict", "control.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv"), 225 | join("results", module_name, "epinano_delta_predict", "test_control.DeltaMis3.DeltaDel3.DeltaQ3.MODEL.rrach.deltaQ3.deltaMis3.deltaDel3.linear.dump.csv"), 226 | output: 227 | res_tsv=join("results", "final", "{}_results.tsv".format(module_name)) 228 | wildcard_constraints: 229 | cond="[^.]" 230 | log: join("logs",module_name, rule_name, "out.log"), 231 | threads: get_threads(config, rule_name) 232 | resources: mem_mb=get_mem(config, rule_name) 233 | container: "library://aleg/default/epinano:1.2.0" 234 | shell: "touch {output.res_tsv}" 235 | 236 | 237 | #python ../../Epinano_Variants.py -R ref.fa -b wt.bam -n 6 -T t -s ../../misc/sam2tsv.jar 238 | #python ../../Epinano_Variants.py -R ref.fa -b ko.bam -n 6 -T t -s ../../misc/sam2tsv.jar 239 | #python ../../Epinano_Predict.py -o SVM_Predict -M /usr/EpiNano/models/rrach.q3.mis3.del3.linear.dump -p wt.plus_strand.per_site.5mer.csv -cl 8,13,23 240 | # 241 | # 242 | # 243 | #echo "generate delta-features" 244 | #python ../../misc/Epinano_make_delta.py wt.plus_strand.per_site.5mer.csv ko.plus_strand.per_site.5mer.csv 5 > wt_ko_delta.5mer.csv 245 | #echo "predict using pretrained SVM models with delta features" 246 | #python ../../Epinano_Predict.py -o SVM_Predict_delta_features -M /usr/EpiNano//models/rrach.deltaQ3.deltaMis3.deltaDel3.linear.dump -p wt_ko_delta.5mer.csv -cl 7,12,22 247 | 248 | #python $EPINANO_HOME/Epinano_Predict.py 249 | # --model q3.mis3.del3.MODEL.linear.model.dump 250 | # --predict some_sample.per_site.5mer.csv 251 | # --columns 8,13,23 252 | # --out_prefix some_sample.modification 253 | # 254 | # 255 | #python $EPINANO_HOME/Epinano_Variants.py -n 6 -R reference.fasta -b sample.reads.bam -s /path/to/sam2tsv/sam2tsv.jar --type t 256 | # 257 | # 258 | -------------------------------------------------------------------------------- /MetaCompore/workflow/rules/input.smk: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | from os.path import join 6 | from snakemake.remote.FTP import RemoteProvider as FTP 7 | from snakemake.remote.HTTP import RemoteProvider as HTTP 8 | 9 | ##### Define RemoteProvider if needed ##### 10 | 11 | transcriptome_ref = config["transcriptome_ref"] 12 | 13 | if transcriptome_ref.startswith("ftp"): 14 | transcriptome_ref = FTP().remote(transcriptome_ref) 15 | 16 | elif transcriptome_ref.startswith("http"): 17 | transcriptome_ref = HTTP().remote(transcriptome_ref) 18 | 19 | 20 | ##### Rules ##### 21 | 22 | module_name = "input" 23 | 24 | rule_name="get_transcriptome" 25 | rule get_transcriptome: 26 | input: fasta=transcriptome_ref 27 | output: 28 | fasta = join("results", module_name, rule_name, "transcriptome_reference.fa"), 29 | fai = join("results", module_name, rule_name, "transcriptome_reference.fa.fai") 30 | log: join("logs",module_name, rule_name, "transcriptome_reference.log") 31 | threads: get_threads(config, rule_name) 32 | params: opt=get_opt(config, rule_name) 33 | resources: mem_mb=get_mem(config, rule_name) 34 | container: "library://aleg/default/metacompore_python:3.8.6" 35 | script: f"../scripts/get_transcriptome.py" 36 | 37 | rule_name="generate_transcriptome_picard_index" 38 | rule generate_transcriptome_picard_index: 39 | input: fasta=rules.get_transcriptome.input.fasta 40 | output: 41 | picard_dict = join("results", module_name, "get_transcriptome", "transcriptome_reference.fa.dict"), 42 | log: join("logs",module_name, rule_name, "transcriptome_reference_picard.log") 43 | threads: 1 44 | resources: mem_mb=get_mem(config, rule_name) 45 | container: "docker://broadinstitute/picard" 46 | shell: 47 | """ 48 | perl -pe "s/'$/primesymbol/" {input.fasta} > {input.fasta}.sanitized 49 | java -jar /usr/picard/picard.jar CreateSequenceDictionary R={input.fasta}.sanitized O={output.picard_dict}.sanitized 50 | perl -pe "s/primesymbol/'/" {output.picard_dict}.sanitized > {output.picard_dict} 51 | """ 52 | -------------------------------------------------------------------------------- /MetaCompore/workflow/rules/mines.smk: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | # Std lib 6 | from os.path import join 7 | 8 | ##### Rules ##### 9 | module_name="mines" 10 | 11 | rule_name="tombo_de_novo" 12 | rule tombo_de_novo: 13 | input: 14 | fast5=expand(join("results", "tombo", "tombo_preprocess", "test_{rep}"), rep=replicates_list), 15 | output: 16 | stats_h5=join("results", module_name, rule_name, "test.tombo.stats") 17 | log: join("logs",module_name, rule_name, "test.log") 18 | threads: get_threads(config, rule_name) 19 | params: 20 | opt=get_opt(config, rule_name), 21 | bn=join("results", module_name, rule_name, "test") 22 | resources: mem_mb=get_mem(config, rule_name) 23 | container: "library://aleg/default/tombo:1.5.1" 24 | shell: "tombo detect_modifications de_novo {params.opt} --processes {threads} --fast5-basedirs {input.fast5} --statistics-file-basename {params.bn} &> {log}" 25 | 26 | rule_name="tombo_de_novo_text_output" 27 | rule tombo_de_novo_text_output: 28 | input: 29 | fast5=expand(join("results", "tombo", "tombo_preprocess", "test_{rep}"), rep=replicates_list), 30 | stats_h5=rules.tombo_de_novo.output.stats_h5 31 | output: 32 | coverage=join("results", module_name, rule_name, "test.coverage.plus.bedgraph"), 33 | fraction=temp(join("results", module_name, rule_name, "test.fraction_modified_reads.plus.wig")), 34 | coverage_minus=temp(join("results", module_name, rule_name, "test.coverage.minus.bedgraph")), 35 | fraction_minus=temp(join("results", module_name, rule_name, "test.fraction_modified_reads.minus.wig")) 36 | log: join("logs",module_name, rule_name, "test.log"), 37 | threads: get_threads(config, rule_name) 38 | params: 39 | opt=get_opt(config, rule_name), 40 | bn=join("results", module_name, rule_name, "test") 41 | resources: mem_mb=get_mem(config, rule_name) 42 | container: "library://aleg/default/tombo:1.5.1" 43 | shell: "tombo text_output browser_files --fast5-basedirs {input.fast5} --statistics-filename {input.stats_h5} --browser-file-basename {params.bn} --file-types coverage fraction &> {log}" 44 | 45 | rule_name="mines_wig2bed" 46 | rule mines_wig2bed: 47 | input: 48 | fraction=rules.tombo_de_novo_text_output.output.fraction 49 | output: 50 | fraction=join("results", module_name, rule_name, "test.fraction_modified_reads.plus.bed") 51 | log: join("logs",module_name, rule_name, "out.log"), 52 | threads: get_threads(config, rule_name) 53 | params: opt=get_opt(config, rule_name) 54 | resources: mem_mb=get_mem(config, rule_name) 55 | container: "library://aleg/default/mines:latest" 56 | shell: "wig2bed < {input.fraction} > {output.fraction} 2> {log}" 57 | 58 | rule_name="mines_cdna" 59 | rule mines_cdna: 60 | input: 61 | coverage=rules.tombo_de_novo_text_output.output.coverage, 62 | fraction=rules.mines_wig2bed.output.fraction, 63 | fasta=rules.get_transcriptome.output.fasta, 64 | kmer_models="resources/mines/names.txt" 65 | output: 66 | res_bed=join("results", module_name, rule_name, "mines_results.bed") 67 | log: join("logs",module_name, rule_name, "out.log"), 68 | threads: get_threads(config, rule_name) 69 | params: opt=get_opt(config, rule_name) 70 | resources: mem_mb=get_mem(config, rule_name) 71 | container: "library://aleg/default/mines:latest" 72 | shell: "cDNA_MINES --kmer_models {input.kmer_models} --fraction_modified {input.fraction} --coverage {input.coverage} --ref {input.fasta} --output {output.res_bed} &> {log}" 73 | 74 | rule_name="mines_postprocess" 75 | rule mines_postprocess: 76 | input: 77 | res_bed=rules.mines_cdna.output.res_bed 78 | output: 79 | res_tsv=join("results", "final", "{}_results.tsv".format(module_name)) 80 | log: join("logs",module_name, rule_name, "out.log"), 81 | threads: get_threads(config, rule_name) 82 | params: opt=get_opt(config, rule_name) 83 | resources: mem_mb=get_mem(config, rule_name) 84 | container: "library://aleg/default/metacompore_python:3.8.6" 85 | script: "../scripts/mines_postprocess.py" 86 | -------------------------------------------------------------------------------- /MetaCompore/workflow/rules/nanocompore.smk: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | # Std lib 6 | from os.path import join 7 | 8 | ##### Rules ##### 9 | module_name="nanocompore" 10 | 11 | rule_name="nanocompore_eventalign_collapse" 12 | rule nanocompore_eventalign_collapse: 13 | input: 14 | tsv=rules.f5c_eventalign.output.tsv 15 | output: 16 | outdir=directory(join("results", module_name, rule_name, "{cond}_{rep}")), 17 | tsv=join("results", module_name, rule_name, "{cond}_{rep}", "out_eventalign_collapse.tsv"), 18 | idx=join("results", module_name, rule_name, "{cond}_{rep}", "out_eventalign_collapse.tsv.idx") 19 | log: join("logs", module_name, rule_name, "{cond}_{rep}.log") 20 | threads: get_threads(config, rule_name) 21 | params: opt=get_opt(config, rule_name) 22 | resources: mem_mb=get_mem(config, rule_name) 23 | container: "library://aleg/default/nanocompore:1.0.3" 24 | shell: "nanocompore eventalign_collapse -t {threads} {params.opt} --overwrite -i {input.tsv} -o {output.outdir} &> {log}" 25 | 26 | rule_name="nanocompore_sampcomp" 27 | rule nanocompore_sampcomp: 28 | input: 29 | control_tsv=expand(join("results", module_name, "nanocompore_eventalign_collapse", "control_{rep}", "out_eventalign_collapse.tsv"), rep=replicates_list), 30 | test_tsv=expand(join("results", module_name, "nanocompore_eventalign_collapse", "test_{rep}", "out_eventalign_collapse.tsv"), rep=replicates_list), 31 | fasta=rules.get_transcriptome.output.fasta 32 | output: 33 | res_tsv=join("results", module_name, rule_name, "outnanocompore_results.tsv"), 34 | shift_tsv=join("results", module_name, rule_name, "outnanocompore_shift_stats.tsv"), 35 | res_db=join("results", module_name, rule_name, "outSampComp.db"), 36 | log: join("logs",module_name, rule_name, "sampcomp.log") 37 | threads: get_threads(config, rule_name) 38 | params: opt=get_opt(config, rule_name) 39 | resources: mem_mb=get_mem(config, rule_name) 40 | container: "library://aleg/default/nanocompore:1.0.3" 41 | script: f"../scripts/nanocompore_sampcomp.py" 42 | 43 | rule_name="nanocompore_postprocess" 44 | rule nanocompore_postprocess: 45 | input: 46 | res_tsv=rules.nanocompore_sampcomp.output.res_tsv, 47 | fasta=rules.get_transcriptome.output.fasta 48 | output: 49 | join("results", "final", "nanocompore_results_GMM_context_0.tsv"), 50 | join("results", "final", "nanocompore_results_GMM_context_2.tsv"), 51 | join("results", "final", "nanocompore_results_KS_dwell_context_0.tsv"), 52 | join("results", "final", "nanocompore_results_KS_dwell_context_2.tsv"), 53 | join("results", "final", "nanocompore_results_KS_intensity_context_0.tsv"), 54 | join("results", "final", "nanocompore_results_KS_intensity_context_2.tsv") 55 | log: join("logs", module_name, rule_name, "out.log"), 56 | threads: get_threads(config, rule_name) 57 | params: opt=get_opt(config, rule_name) 58 | resources: mem_mb=get_mem(config, rule_name) 59 | container: "library://aleg/default/metacompore_python:3.8.6" 60 | script: "../scripts/nanocompore_postprocess.py" 61 | -------------------------------------------------------------------------------- /MetaCompore/workflow/rules/quality_control.smk: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | # Std lib 6 | from os.path import join 7 | 8 | ##### Rules ##### 9 | module_name = "quality_control" 10 | 11 | rule_name = "pycoQC" 12 | rule pycoQC: 13 | input: 14 | seqsum = rules.ont_guppy.output.seqsum, 15 | bam = rules.minimap2_align.output.bam, 16 | bam_index = rules.minimap2_align.output.bam_index 17 | output: 18 | json = join("results", module_name, rule_name, "pycoQC_{cond}_{rep}.json"), 19 | html = join("results", module_name, rule_name, "pycoQC_{cond}_{rep}.html") 20 | log: join("logs", module_name, rule_name, "{cond}_{rep}.log") 21 | threads: get_threads(config, rule_name) 22 | params: opt = get_opt(config, rule_name) 23 | resources: mem_mb = get_mem(config, rule_name) 24 | container: "library://aleg/default/pycoqc:2.5.2" 25 | shell: "pycoQC {params.opt} -f {input.seqsum} -a {input.bam} -o {output.html} -j {output.json} &> {log}" 26 | -------------------------------------------------------------------------------- /MetaCompore/workflow/rules/resquiggling.smk: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | # Std lib 6 | from os.path import join 7 | 8 | ##### Rules ##### 9 | module_name="resquiggling" 10 | 11 | if config["gpu_acceleration"]: 12 | f5c_container="library://aleg/default/f5c:gpu-0.6" 13 | else: 14 | f5c_container="library://aleg/default/f5c:cpu-0.6" 15 | 16 | rule_name="f5c_index" 17 | rule f5c_index: 18 | input: 19 | fast5_dir=get_fast5, 20 | fastq=rules.merge_fastq.output.fastq 21 | output: 22 | index=rules.merge_fastq.output.fastq+".index", 23 | index_fai=rules.merge_fastq.output.fastq+".index.fai", 24 | index_gzi=rules.merge_fastq.output.fastq+".index.gzi", 25 | index_readdb=rules.merge_fastq.output.fastq+".index.readdb" 26 | log: join("logs", module_name, rule_name, "{cond}_{rep}.log") 27 | threads: get_threads(config, rule_name) 28 | params: opt=get_opt(config, rule_name) 29 | resources: mem_mb=get_mem(config, rule_name) 30 | container: f5c_container 31 | shell: "f5c index {params.opt} -t {threads} -d {input.fast5_dir} {input.fastq} 2> {log}" 32 | 33 | rule_name="f5c_eventalign" 34 | rule f5c_eventalign: 35 | input: 36 | fastq=rules.merge_fastq.output.fastq, 37 | index=rules.f5c_index.output.index, 38 | bam=rules.alignmemt_postfilter.output.bam, 39 | fasta=rules.get_transcriptome.output.fasta, 40 | kmer_model="resources/f5c/r9.4_70bps.u_to_t_rna.5mer.template.model" 41 | output: 42 | tsv=join("results", module_name, rule_name, "{cond}_{rep}_data.tsv"), 43 | summary=join("results", module_name, rule_name, "{cond}_{rep}_summary.tsv") 44 | log: join("logs", module_name, rule_name, "{cond}_{rep}.log") 45 | threads: get_threads(config, rule_name) 46 | params: opt=get_opt(config, rule_name) 47 | resources: mem_mb=get_mem(config, rule_name) 48 | container: f5c_container 49 | shell: "f5c eventalign {params.opt} -t {threads} --kmer-model {input.kmer_model} -r {input.fastq} -b {input.bam} -g {input.fasta} --summary {output.summary} > {output.tsv} 2> {log}" 50 | -------------------------------------------------------------------------------- /MetaCompore/workflow/rules/tombo.smk: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | # Std lib 6 | from os.path import join 7 | 8 | ##### Rules ##### 9 | module_name="tombo" 10 | 11 | rule_name="tombo_preprocess" 12 | rule tombo_preprocess: 13 | input: 14 | fast5_dir=get_fast5, 15 | fastq=rules.merge_fastq.output.fastq, 16 | fasta=rules.get_transcriptome.output.fasta, 17 | selected_reads_fn=rules.alignmemt_postfilter.output.selected_reads_fn 18 | output: 19 | fast5_dir=directory(join("results", module_name, rule_name, "{cond}_{rep}")) 20 | log: join("logs",module_name, rule_name, "{cond}_{rep}.log") 21 | threads: get_threads(config, rule_name) 22 | params: opt=get_opt(config, rule_name) 23 | resources: mem_mb=get_mem(config, rule_name) 24 | container: "library://aleg/default/tombo:1.5.1" 25 | script: "../scripts/tombo_preprocess.py" 26 | 27 | rule_name="tombo_level_sample_compare" 28 | rule tombo_level_sample_compare: 29 | input: 30 | control_fast5=expand(join("results", module_name, "tombo_preprocess", "control_{rep}"), rep=replicates_list), 31 | test_fast5=expand(join("results", module_name, "tombo_preprocess", "test_{rep}"), rep=replicates_list) 32 | output: 33 | res_h5=join("results", module_name, rule_name, "results.tombo.stats") 34 | log: join("logs",module_name, rule_name, "out.log") 35 | threads: get_threads(config, rule_name) 36 | params: 37 | opt=get_opt(config, rule_name), 38 | bn=join("results", module_name, rule_name, "results") 39 | resources: mem_mb=get_mem(config, rule_name) 40 | container: "library://aleg/default/tombo:1.5.1" 41 | shell: "tombo detect_modifications level_sample_compare {params.opt} --processes {threads} --fast5-basedirs {input.test_fast5} --alternate-fast5-basedirs {input.control_fast5} --statistics-file-basename {params.bn} &> {log}" 42 | 43 | rule_name="tombo_postprocess" 44 | rule tombo_postprocess: 45 | input: 46 | res_h5=rules.tombo_level_sample_compare.output.res_h5, 47 | fasta=rules.get_transcriptome.output.fasta 48 | output: 49 | res_tsv=join("results", "final", "{}_results.tsv".format(module_name)) 50 | log: join("logs", module_name, rule_name, "out.log") 51 | threads: get_threads(config, rule_name) 52 | params: opt=get_opt(config, rule_name) 53 | resources: mem_mb=get_mem(config, rule_name) 54 | container: "library://aleg/default/metacompore_python:3.8.6" 55 | script: "../scripts/tombo_postprocess.py" 56 | -------------------------------------------------------------------------------- /MetaCompore/workflow/schemas/config.schema.yaml: -------------------------------------------------------------------------------- 1 | $schema: "http://json-schema.org/draft-06/schema#" 2 | 3 | description: snakemake configuration file 4 | 5 | type: object 6 | 7 | # possible entries of the config file and which type they should be 8 | properties: 9 | transcriptome_ref: 10 | type: string 11 | description: Path to an ENSEMBL FASTA reference transcriptome file/URL to be used for read mapping (local, FTP or HTTP) 12 | properties: 13 | gpu_acceleration: 14 | type: boolean 15 | description: Is true use gpus for basecalling (requires CUDA libs) 16 | properties: 17 | nanocompore: 18 | type: boolean 19 | description: If True run nanocompore 20 | properties: 21 | tombo: 22 | type: boolean 23 | description: If True run tombo 24 | properties: 25 | differr: 26 | type: boolean 27 | description: If True run differr_nanopore_DRS 28 | properties: 29 | eligos2: 30 | type: boolean 31 | description: If True run eligos2 32 | properties: 33 | xpore: 34 | type: boolean 35 | description: If True run xpore 36 | properties: 37 | mines: 38 | type: boolean 39 | description: If True run mines 40 | properties: 41 | epinano: 42 | type: boolean 43 | description: If True run epinano 44 | 45 | # entries that have to be in the config file for successful validation 46 | required: 47 | - transcriptome_ref 48 | - gpu_acceleration 49 | - nanocompore 50 | - tombo 51 | - differr 52 | - eligos2 53 | - xpore 54 | - mines 55 | - epinano 56 | -------------------------------------------------------------------------------- /MetaCompore/workflow/schemas/samples.schema.yaml: -------------------------------------------------------------------------------- 1 | $schema: "http://json-schema.org/draft-06/schema#" 2 | 3 | description: Entry in the sample sheet 4 | 5 | # columns that the config/samples.tsv file can have and which type they should be 6 | properties: 7 | sample_id: 8 | type: [string, integer] 9 | description: Sample name/identifier 10 | condition: 11 | type: string 12 | description: Sample condition that will be compared during differential analysis (e.g. WT, KO) 13 | replicate: 14 | type: [string, integer] 15 | description: Sample replicate id (e.g. 1,2,3 or A,B,C) 16 | fast5_dir: 17 | type: string 18 | description: Path to directory containing fast5 files 19 | 20 | # columns that the config/samples.tsv file must have to pass schema validation 21 | required: 22 | - sample_id 23 | - condition 24 | - replicate 25 | - fast5_dir 26 | -------------------------------------------------------------------------------- /MetaCompore/workflow/scripts/differr_compare.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | from snakemake.shell import shell 5 | import os 6 | 7 | ##### RUN SCRIPT FUNCTION ##### 8 | control_bam_list = snakemake.input.control_bam 9 | test_bam_list = snakemake.input.test_bam 10 | fasta = snakemake.input.fasta 11 | res_bed = snakemake.output.res_bed 12 | log = snakemake.log[0] 13 | threads = snakemake.threads 14 | opt = snakemake.params.opt 15 | 16 | control_bam_opt = "" 17 | for i in control_bam_list: 18 | control_bam_opt += f" -a {i}" 19 | 20 | test_bam_opt = "" 21 | for i in test_bam_list: 22 | test_bam_opt += f" -b {i}" 23 | 24 | shell("differr -p {threads} {opt} {control_bam_opt} {test_bam_opt} -r {fasta} -o {res_bed} &> {log}") 25 | -------------------------------------------------------------------------------- /MetaCompore/workflow/scripts/differr_postprocess.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | import logging 6 | import pandas as pd 7 | import numpy as np 8 | import datetime 9 | 10 | ##### DEFINE SCRIPT FUNCTION ##### 11 | def differr_postprocess (res_bed, res_tsv, log): 12 | 13 | logging.basicConfig(filename=log, filemode="w", level=logging.INFO, format='%(message)s') 14 | logging.info("timestamp: {}".format(str(datetime.datetime.now()))) 15 | for i, j in locals().items(): 16 | logging.info("\t{}: {}\n".format(i,j)) 17 | 18 | min_pval = np.nextafter(float(0), float(1)) 19 | max_pval = 1 20 | 21 | logging.info("Loading data") 22 | df = pd.read_csv(res_bed, sep="\t", usecols=[0,1,4,6,7,8,9], 23 | names=[ 24 | "refid", 25 | "pos", 26 | "end", 27 | "name", 28 | "score", 29 | "strand", 30 | "odds_ratio", 31 | "G_stat", 32 | "-log10_pval", 33 | "-log10_FDR", 34 | "G_stat_control", 35 | "-log10_pval_control", 36 | "G_stat_test", 37 | "-log10_pval_test"]) 38 | 39 | logging.info("Cleanup data and calculate pvalue") 40 | df = df.replace(np.inf, np.nan) 41 | df = df.dropna(subset=["score", "-log10_pval"]) 42 | df["pvalue"]=np.power(10, -df["-log10_pval"]).clip(min_pval, max_pval) 43 | df["adj_pvalue"]=np.power(10, -df["-log10_FDR"]).clip(min_pval, max_pval) 44 | df=df[["refid", "pos", "pvalue", "adj_pvalue", "odds_ratio", "G_stat"]] 45 | 46 | logging.info("Write output file") 47 | df.to_csv(res_tsv, index=False, sep="\t") 48 | 49 | ##### RUN SCRIPT FUNCTION ##### 50 | differr_postprocess ( 51 | res_bed=snakemake.input.res_bed, 52 | res_tsv=snakemake.output.res_tsv, 53 | log=snakemake.log[0]) 54 | -------------------------------------------------------------------------------- /MetaCompore/workflow/scripts/eligos2_pair_diff_mod.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | from snakemake.shell import shell 5 | import os 6 | from tempfile import TemporaryDirectory 7 | 8 | ##### RUN SCRIPT FUNCTION ##### 9 | control_bam = snakemake.input.control_bam 10 | test_bam = snakemake.input.test_bam 11 | fasta = snakemake.input.fasta 12 | bed = snakemake.input.bed 13 | res_tsv = snakemake.output.res_tsv 14 | log = snakemake.log[0] 15 | threads = snakemake.threads 16 | opt = snakemake.params.opt 17 | outdir = os.path.dirname(res_tsv) 18 | 19 | with TemporaryDirectory() as tempdir: 20 | shell("eligos2 pair_diff_mod -t {threads} {opt} -cbam {control_bam} -tbam {test_bam} -ref {fasta} -reg {bed} --sub_bam_dir {tempdir} -o {outdir} &> {log}") 21 | -------------------------------------------------------------------------------- /MetaCompore/workflow/scripts/eligos2_postprocess.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | import logging 6 | import pandas as pd 7 | import numpy as np 8 | import datetime 9 | 10 | ##### DEFINE SCRIPT FUNCTION ##### 11 | def eligos2_postprocess ( 12 | input_tsv, 13 | output_tsv, 14 | log, 15 | min_oddR, 16 | min_esb, 17 | min_cov, 18 | max_adj_pval, 19 | discard_homopolymers, 20 | ref_base): 21 | 22 | logging.basicConfig(filename=log, filemode="w", level=logging.INFO, format='%(message)s') 23 | logging.info("timestamp: {}".format(str(datetime.datetime.now()))) 24 | for i, j in locals().items(): 25 | logging.info("\t{}: {}\n".format(i,j)) 26 | 27 | min_pval = np.nextafter(float(0), float(1)) 28 | max_pval = 1 29 | 30 | logging.info("Loading data") 31 | df = pd.read_csv(input_tsv, sep='\t') 32 | df = df.dropna(subset=["adjPval"]) 33 | df["adjPval"] = df["adjPval"].clip(min_pval, max_pval) 34 | 35 | logging.info("Cleanup data and calculate pvalue") 36 | if min_oddR is not None: 37 | df = df[df["oddR"] >= min_oddR] 38 | if min_esb is not None: 39 | df = df[df["ESB_test"] >= min_esb] 40 | if min_cov is not None: 41 | df = df[df["total_reads"] >= min_cov] 42 | if max_adj_pval is not None: 43 | df = df[df["adjPval"] <= max_adj_pval] 44 | if discard_homopolymers is True: 45 | df = df[df["homo_seq"] == "--"] 46 | if ref_base in ["A","T","C","G"]: 47 | df = df[df["ref"] == ref_base] 48 | 49 | df = df[['chrom', 'start_loc', 'pval', 'adjPval', 'oddR']] 50 | df = df.rename(columns={'chrom':"refid", 'start_loc':"pos", 'pval':"pvalue", 'adjPval':"adj_pvalue", 'oddR':"odds_ratio"}) 51 | 52 | logging.info("Write output file") 53 | df.to_csv(output_tsv, index=False, sep="\t") 54 | 55 | ##### RUN SCRIPT FUNCTION ##### 56 | eligos2_postprocess ( 57 | input_tsv=snakemake.input.res_tsv, 58 | output_tsv=snakemake.output.res_tsv, 59 | log=snakemake.log[0], 60 | min_oddR=snakemake.params.opt.get("min_oddR", 1.2), 61 | min_esb=snakemake.params.opt.get("min_esb", 0), 62 | min_cov=snakemake.params.opt.get("min_cov", 30), 63 | max_adj_pval=snakemake.params.opt.get("max_adj_pval", 0.01), 64 | discard_homopolymers=snakemake.params.opt.get("discard_homopolymers", True), 65 | ref_base=snakemake.params.opt.get("ref_base", "A")) 66 | -------------------------------------------------------------------------------- /MetaCompore/workflow/scripts/epinano_filter_kmers.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | import sys 4 | 5 | r = ["A", "G"] 6 | h = ["A", "C", "T", "U"] 7 | 8 | input = snakemake.input.variants 9 | output= snakemake.output.filteredvariants 10 | 11 | with open(output,'w') as outfile: 12 | with open(input, 'r') as f: 13 | for line in f: 14 | line=line 15 | if line[0]=="#": 16 | outfile.write(line) 17 | else: 18 | fields=line.split(',') 19 | kmer=fields[0].upper() 20 | if kmer[0] in r and kmer[1] in r and kmer[2] == "A" and kmer[3]=="C" and kmer[4] in h: 21 | outfile.write(line) 22 | 23 | -------------------------------------------------------------------------------- /MetaCompore/workflow/scripts/get_transcriptome.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | from collections import OrderedDict 6 | import logging 7 | from pyBioTools import Fasta 8 | from pyfaidx import Faidx 9 | import datetime 10 | 11 | ##### DEFINE SCRIPT FUNCTION ##### 12 | 13 | def get_transcriptome(fa_input, fa_output, fai_output, log): 14 | 15 | logging.basicConfig(filename=log, filemode="w", level=logging.INFO, format='%(message)s') 16 | logging.info("timestamp: {}".format(str(datetime.datetime.now()))) 17 | for i, j in locals().items(): 18 | logging.info("\t{}: {}\n".format(i,j)) 19 | 20 | try: 21 | # Parse fasta file uncompress and simplify transcript ids 22 | logging.info("Read input transcriptome fasta file") 23 | with open(fa_output, "w") as fa_out: 24 | for rec in Fasta.Reader(fa_input): 25 | fa_out.write(">{}\n{}\n".format(rec.short_name, rec.seq)) 26 | 27 | logging.info("Index fasta file") 28 | with Faidx(fa_output) as fa_out: 29 | fa_out.build_index() 30 | 31 | except: 32 | logging.exception('Error while running get_transcriptome') 33 | raise 34 | 35 | ##### RUN SCRIPT FUNCTION ##### 36 | 37 | get_transcriptome( 38 | fa_input = str(snakemake.input.fasta), # str required for FTP or HTTP sources 39 | fa_output = snakemake.output.fasta, 40 | fai_output = snakemake.output.fai, 41 | log = snakemake.log[0]) 42 | -------------------------------------------------------------------------------- /MetaCompore/workflow/scripts/min_ref_coverage.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | import os 6 | import gzip 7 | import logging 8 | from collections import Counter, defaultdict 9 | import datetime 10 | 11 | ##### DEFINE SCRIPT FUNCTION ##### 12 | 13 | def min_ref_coverage (reads_index_list, ref_list, min_cov, log): 14 | 15 | logging.basicConfig(filename=log, filemode="w", level=logging.INFO, format='%(message)s') 16 | logging.info("timestamp: {}".format(str(datetime.datetime.now()))) 17 | for i, j in locals().items(): 18 | logging.info("\t{}: {}\n".format(i,j)) 19 | 20 | try: 21 | logging.info("Parse index files") 22 | d = defaultdict(Counter) 23 | all_ref = set() 24 | for fn in reads_index_list: 25 | sample_id = os.path.basename(fn) 26 | with gzip.open(fn, "rt") as fp: 27 | header = next(fp) 28 | for l in fp: 29 | ref_id = l.split("\t")[1] 30 | d[sample_id][ref_id]+=1 31 | all_ref.add(ref_id) 32 | logging.info("Total references: {}".format(len(all_ref))) 33 | 34 | logging.info("Count valid references per sample") 35 | valid_ref_count = Counter() 36 | for sample_id, ref_d in d.items(): 37 | for ref_id, i in ref_d.items(): 38 | if i >= min_cov: 39 | valid_ref_count[ref_id]+=1 40 | 41 | logging.info("Select valid references for all samples") 42 | valid_ref_list = [] 43 | for ref_id, i in valid_ref_count.items(): 44 | if i == len(reads_index_list): 45 | valid_ref_list.append(ref_id) 46 | logging.info("Valid references: {}".format(len(valid_ref_list))) 47 | 48 | logging.info("Writing valid references to file") 49 | with open(ref_list, "w") as fp: 50 | for ref in valid_ref_list: 51 | fp.write("{}\n".format(ref)) 52 | 53 | except: 54 | logging.exception('Error while running min_ref_coverage') 55 | raise 56 | 57 | ##### RUN SCRIPT FUNCTION ##### 58 | 59 | min_ref_coverage( 60 | reads_index_list = snakemake.input.reads_index_list, 61 | ref_list = snakemake.output.ref_list, 62 | min_cov = snakemake.params.opt.get("min_cov", 0), 63 | log = snakemake.log[0]) 64 | -------------------------------------------------------------------------------- /MetaCompore/workflow/scripts/mines_postprocess.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | import logging 6 | import pandas as pd 7 | import numpy as np 8 | import datetime 9 | 10 | ##### DEFINE SCRIPT FUNCTION ##### 11 | def mines_postprocess (res_bed, res_tsv, log, min_cov): 12 | 13 | logging.basicConfig(filename=log, filemode="w", level=logging.INFO, format='%(message)s') 14 | logging.info("timestamp: {}".format(str(datetime.datetime.now()))) 15 | for i, j in locals().items(): 16 | logging.info("\t{}: {}\n".format(i,j)) 17 | 18 | logging.info("Loading data") 19 | df = pd.read_csv(res_bed, sep="\t", names=["refid", "pos", "end", "kmer", "unique key", "strand", "fraction modified", "coverage"], usecols=[0,1,3,6,7]) 20 | 21 | logging.info("Filtering out low coverage position") 22 | df = df[df["coverage"]>=min_cov] 23 | 24 | logging.info("Write output file") 25 | df.to_csv(res_tsv, index=False, sep="\t") 26 | 27 | ##### RUN SCRIPT FUNCTION ##### 28 | mines_postprocess ( 29 | res_bed=snakemake.input.res_bed, 30 | res_tsv=snakemake.output.res_tsv, 31 | log = snakemake.log[0], 32 | min_cov=snakemake.params.opt.get("min_cov", 30)) 33 | -------------------------------------------------------------------------------- /MetaCompore/workflow/scripts/nanocompore_postprocess.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | import logging 6 | import numpy as np 7 | import pandas as pd 8 | from collections import Counter, OrderedDict 9 | import pyfaidx 10 | from scipy.signal import find_peaks 11 | import datetime 12 | import os 13 | 14 | ##### DEFINE SCRIPT FUNCTION ##### 15 | def nanocompore_postprocess ( 16 | input_tsv, 17 | fasta, 18 | outdir, 19 | log, 20 | p_val_lim=0.01, 21 | quantile_lim=0.5, 22 | min_distance=9): 23 | 24 | logging.basicConfig(filename=log, filemode="w", level=logging.INFO, format='%(message)s') 25 | logging.info("timestamp: {}".format(str(datetime.datetime.now()))) 26 | for i, j in locals().items(): 27 | logging.info("\t{}: {}\n".format(i,j)) 28 | 29 | # Define variables 30 | sig_lim = -np.log10(p_val_lim) 31 | min_pval = np.nextafter(float(0), float(1)) 32 | max_pval = 1 33 | 34 | tests_d = { 35 | "GMM_logit_pvalue": {"peak":"GMM_logit_peak" ,"label":"GMM_context_0"}, 36 | "GMM_logit_pvalue_context_2": {"peak":"GMM_logit_peak_context_2", "label":"GMM_context_2"}, 37 | "KS_intensity_pvalue": {"peak":"KS_intensity_peak", "label":"KS_intensity_context_0"}, 38 | "KS_intensity_pvalue_context_2": {"peak":"KS_intensity_peak_context_2", "label":"KS_intensity_context_2"}, 39 | "KS_dwell_pvalue": {"peak":"KS_dwell_peak", "label":"KS_dwell_context_0"}, 40 | "KS_dwell_pvalue_context_2": {"peak":"KS_dwell_peakontext_2", "label":"KS_dwell_context_2"}} 41 | 42 | # Get transcript lengths in dict for convenience 43 | logging.info('Load Fasta reference lengths\n') 44 | with pyfaidx.Fasta(fasta) as fa: 45 | tx_len_dict = {i.name:len(i) for i in fa} 46 | 47 | # Get data and cleanup 48 | logging.info('Load and cleanup data\n') 49 | df = pd.read_csv(input_tsv, sep="\t", dtype={'chr':str}) 50 | 51 | test_sig_d = OrderedDict() 52 | logging.info('Iterate over transcripts and call peaks\n') 53 | 54 | for test in list(tests_d.keys()): 55 | c = Counter() 56 | test_label = tests_d[test]["label"] 57 | 58 | with open (os.path.join(outdir, f"nanocompore_results_{test_label}.tsv"), "w") as res_fp: 59 | res_fp.write("ref_id\tpos\tpvalue\tpeak\n") 60 | # Extract data for current test and cleanup 61 | test_df = df[["ref_id", "pos", test]].copy() 62 | test_df = test_df.rename(columns={test:"pvalue"}) 63 | test_df["pvalue"]= test_df["pvalue"].fillna(1) 64 | test_df["pvalue"] = test_df["pvalue"].clip(min_pval, max_pval) 65 | 66 | # Iterate over tx for peak calling 67 | for tx, tx_df in test_df.groupby("ref_id"): 68 | c["All transcripts"]+=1 69 | x = pd.Series(data=-np.log10(tx_df["pvalue"]).values, index=tx_df["pos"].values) 70 | x = x.reindex(range(tx_len_dict[tx])) 71 | x = x.fillna(0) 72 | sig_val = x[x>=sig_lim] 73 | 74 | if sig_val.empty: 75 | c["Transcripts without significant pvalues"]+=1 76 | else: 77 | c["Transcripts with significant pvalues"]+=1 78 | threshold = np.quantile(sig_val, quantile_lim) 79 | peaks = find_peaks(x, height=threshold, distance=min_distance)[0] 80 | if peaks.size == 0: 81 | c["Transcripts without peaks called"]+=1 82 | else: 83 | c["Transcripts with peaks called"]+=1 84 | 85 | # Write significant hits +- peaks 86 | for i in tx_df.itertuples(): 87 | if i.pvalue <=p_val_lim: 88 | c["Significant pvalues"]+=1 89 | if i.pos in peaks: 90 | c["Peaks detected"]+=1 91 | peak = True 92 | else: 93 | peak = False 94 | res_fp.write(f"{i.ref_id}\t{i.pos}\t{i.pvalue}\t{peak}\n") 95 | 96 | logging.info(f'{test_label} counts\n') 97 | for i, j in c.items(): 98 | logging.info(f'\t{i}:{j}\n') 99 | 100 | ##### RUN SCRIPT FUNCTION ##### 101 | 102 | nanocompore_postprocess ( 103 | input_tsv=snakemake.input.res_tsv, 104 | fasta=snakemake.input.fasta, 105 | outdir=os.path.dirname(snakemake.output[0]), 106 | log = snakemake.log[0], 107 | p_val_lim=snakemake.params.opt.get("p_val_lim", 0.01), 108 | quantile_lim=snakemake.params.opt.get("quantile_lim", 0.5), 109 | min_distance=snakemake.params.opt.get("min_distance", 9)) 110 | -------------------------------------------------------------------------------- /MetaCompore/workflow/scripts/nanocompore_sampcomp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | from snakemake.shell import shell 5 | import os 6 | 7 | ##### RUN SCRIPT FUNCTION ##### 8 | test_tsv = ",".join(snakemake.input.test_tsv) 9 | control_tsv = ",".join(snakemake.input.control_tsv) 10 | fasta = snakemake.input.fasta 11 | outdir = os.path.dirname(snakemake.output.res_tsv) 12 | log = snakemake.log[0] 13 | threads = snakemake.threads 14 | opt = snakemake.params.opt 15 | 16 | shell("nanocompore sampcomp -t {threads} {opt} -w -1 {control_tsv} -2 {test_tsv} --label1 control --label2 test -f {fasta} -o {outdir} &> {log}") 17 | -------------------------------------------------------------------------------- /MetaCompore/workflow/scripts/tombo_postprocess.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | 5 | import logging 6 | import numpy as np 7 | import h5py 8 | import pandas as pd 9 | from collections import Counter 10 | import pyfaidx 11 | from scipy.signal import find_peaks 12 | import datetime 13 | 14 | ##### DEFINE SCRIPT FUNCTION ##### 15 | def tombo_postprocess ( 16 | res_h5, 17 | fasta, 18 | res_tsv, 19 | log, 20 | min_cov=50, 21 | p_val_lim=0.01, 22 | quantile_lim=0.5, 23 | min_distance=9): 24 | 25 | logging.basicConfig(filename=log, filemode="w", level=logging.INFO, format='%(message)s') 26 | logging.info("timestamp: {}".format(str(datetime.datetime.now()))) 27 | for i, j in locals().items(): 28 | logging.info("\t{}: {}\n".format(i,j)) 29 | 30 | # Define variables 31 | sig_lim = -np.log10(p_val_lim) 32 | min_pval = np.nextafter(float(0), float(1)) 33 | max_pval = 1 34 | 35 | # Init collections 36 | tx_id_set=set() 37 | c = Counter() 38 | # first = True 39 | 40 | # Get transcript lengths in dict for convenience 41 | logging.info(f'Load Fasta reference lengths\n') 42 | with pyfaidx.Fasta(fasta) as fa: 43 | tx_len_dict = {i.name:len(i) for i in fa} 44 | 45 | logging.info(f'Extract data from hdf5 database\n') 46 | with h5py.File(res_h5,'r') as h5, open(res_tsv, "w") as res_fp: 47 | res_fp.write("ref_id\tpos\tpvalue\tpeak\n") 48 | 49 | for block_id, block_data in h5["Statistic_Blocks"].items(): 50 | 51 | # Extract attrs 52 | tx_id = block_data.attrs['chrm'] 53 | start = block_data.attrs['start'] 54 | strand = block_data.attrs['strand'] 55 | 56 | if tx_id in tx_id_set: 57 | c["Duplicated transcript"]+=1 58 | elif start > 0: 59 | c["Transcript with invalid start"]+=1 60 | elif strand != "+": 61 | c["Transcript with invalid strand"]+=1 62 | else: 63 | tx_df = pd.DataFrame(block_data.get("block_stats")[()]) 64 | tx_df = tx_df.dropna() 65 | tx_df = tx_df[(tx_df["cov"]>=min_cov) & (tx_df["control_cov"]>=min_cov)] 66 | if tx_df.empty: 67 | c["Low coverage transcripts discarded"]+=1 68 | continue 69 | 70 | tx_df.rename(columns={"stat":"pvalue"}, inplace=True) 71 | tx_df["pvalue"] = tx_df["pvalue"].fillna(1) 72 | tx_df["pvalue"] = np.clip(tx_df["pvalue"], min_pval, max_pval) 73 | 74 | # Peak calling in -log10 space 75 | c["All transcripts"]+=1 76 | x = pd.Series(data=-np.log10(tx_df["pvalue"]).values, index=tx_df["pos"].values) 77 | x = x.reindex(range(tx_len_dict[tx_id])) 78 | x = x.fillna(0) 79 | sig_val = x[x>sig_lim] 80 | 81 | if sig_val.empty: 82 | c["Transcripts without significant pvalues"]+=1 83 | else: 84 | c["Transcripts with significant pvalues"]+=1 85 | threshold = np.quantile(sig_val, quantile_lim) 86 | peaks = find_peaks(x, height=threshold, distance=min_distance)[0] 87 | 88 | # Write significant hits +- peaks 89 | for i in tx_df.itertuples(): 90 | if i.pvalue <=p_val_lim: 91 | c["Significant pvalues"]+=1 92 | if i.pos in peaks: 93 | c["Peaks detected"]+=1 94 | peak = True 95 | else: 96 | peak = False 97 | res_fp.write(f"{tx_id}\t{i.pos}\t{i.pvalue}\t{peak}\n") 98 | 99 | tx_id_set.add(tx_id) 100 | 101 | logging.info(f'Counts\n') 102 | for i, j in c.items(): 103 | logging.info(f'\t{i}:{j}\n') 104 | 105 | ##### RUN SCRIPT FUNCTION ##### 106 | tombo_postprocess ( 107 | res_h5=snakemake.input.res_h5, 108 | fasta=snakemake.input.fasta, 109 | res_tsv=snakemake.output.res_tsv, 110 | log=snakemake.log[0], 111 | min_cov=snakemake.params.opt.get("min_cov", 30), 112 | p_val_lim=snakemake.params.opt.get("p_val_lim", 0.01), 113 | quantile_lim=snakemake.params.opt.get("quantile_lim", 0.5), 114 | min_distance=snakemake.params.opt.get("min_distance", 9)) 115 | -------------------------------------------------------------------------------- /MetaCompore/workflow/scripts/tombo_preprocess.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ##### Imports ##### 4 | from snakemake.shell import shell 5 | import tempfile 6 | 7 | ##### RUN SCRIPT FUNCTION ##### 8 | input_fast5_dir = snakemake.input.fast5_dir 9 | fastq = snakemake.input.fastq 10 | fasta = snakemake.input.fasta 11 | selected_reads_fn = snakemake.input.selected_reads_fn 12 | output_fast5_dir = snakemake.output.fast5_dir 13 | log = snakemake.log[0] 14 | threads = snakemake.threads 15 | 16 | with tempfile.TemporaryDirectory() as temp_dir: 17 | 18 | shell("echo '## fast5_subset ##' > {log}") 19 | shell("fast5_subset -t {threads} -i {input_fast5_dir} -s {temp_dir} -l {selected_reads_fn} -r &>> {log}") 20 | 21 | shell("echo '## multi_to_single_fast5 ##' >> {log}") 22 | shell("multi_to_single_fast5 -t {threads} -i {temp_dir} -s {output_fast5_dir} &>> {log}") 23 | 24 | shell("echo '## tombo preprocess annotate_raw_with_fastqs ##' >> {log}") 25 | shell("tombo preprocess annotate_raw_with_fastqs --fast5-basedir {output_fast5_dir} --fastq-filenames {fastq} --overwrite --processes {threads} &>> {log}") 26 | 27 | shell("echo '## tombo resquiggle ##' >> {log}") 28 | shell("tombo resquiggle --rna --overwrite --processes {threads} {output_fast5_dir} {fasta} &>> {log}") 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MetaCompore v0.1.3 2 | 3 | [![Snakemake](https://img.shields.io/badge/snakemake-≥5.30.1-brightgreen.svg)](https://snakemake.bitbucket.io) 4 | [![DOI](https://zenodo.org/badge/312304999.svg)](https://zenodo.org/badge/latestdoi/312304999) 5 | 6 | 7 | --- 8 | 9 | **Metacompore is a snakemake pipeline running multiple RNA modifications detection tools for nanopore directRNA sequencing** 10 | 11 | At the moment MetaCompore supports the following tools: 12 | * NanoCompore v1.03 :https://github.com/tleonardi/nanocompore/ 13 | * Epinano v1.02: https://github.com/enovoa/EpiNano 14 | * Eligos2 v2.0.0: https://gitlab.com/piroonj/eligos2 15 | * Tombo v1.5.1: https://github.com/nanoporetech/tombo 16 | * MINES: https://github.com/YeoLab/MINES 17 | * differr_nanopore_DRS: https://github.com/bartongroup/differr_nanopore_DRS 18 | 19 | ## Authors 20 | 21 | * Adrien Leger (@a-slide) 22 | * Tommaso Leonardi (@tleonardi) 23 | 24 | ## Usage 25 | 26 | ### Step 1: Obtain a copy of this workflow 27 | 28 | Clone the last tarball archive of the pipeline to your local system, into the location where you want to perform the data analysis 29 | 30 | ``` 31 | wget https://github.com/a-slide/MetaCompore/releases/download/0.1.3/MetaCompore.tar.gz 32 | tar xzf MetaCompore.tar.gz 33 | cd MetaCompore 34 | ``` 35 | 36 | ### Step 2: Install dependencies 37 | 38 | #### Singularity 39 | 40 | If required, install singularity following the official documentation: https://sylabs.io/guides/3.7/user-guide/quick_start.html 41 | 42 | #### Conda / Mamba 43 | 44 | Install miniconda following the official documentation: https://docs.conda.io/en/latest/miniconda.html 45 | 46 | you can also install mamba to speed up snakemake installation: https://github.com/mamba-org/mamba 47 | 48 | #### Snakemake 49 | 50 | Create a virtual environment containing snakemake with [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) 51 | ``` 52 | conda env create -f environment.yaml 53 | ``` 54 | 55 | You can also use [mamba](https://github.com/mamba-org/mamba) which will give you the same result, but much faster 56 | 57 | ``` 58 | mamba env create -f environment.yaml 59 | ``` 60 | 61 | ### Step 3: Configure the workflow 62 | 63 | Configure the workflow according to your needs by editing the files `config.yaml` to configure the workflow execution 64 | 65 | ``` 66 | nano config.yaml 67 | ``` 68 | 69 | Edit the `samples.tsv` to specify your sample setup and fast5 source files 70 | 71 | ``` 72 | nano samples.tsv 73 | ``` 74 | 75 | ### Step 4: Execute workflow 76 | 77 | ### Local Mode 78 | 79 | Activate the conda environment: 80 | 81 | ``` 82 | conda activate snakemake 83 | snakemake --use-singularity -j 4 84 | ``` 85 | 86 | ### LSF cluster Mode 87 | 88 | Set an LSF cluster profile https://github.com/Snakemake-Profiles/lsf 89 | 90 | Edit the lsf rule specific config file `lsf.yaml` 91 | 92 | 93 | ## Disclaimer 94 | 95 | Please be aware that MetaCompore is a research package that is still under development. 96 | 97 | It was tested under Linux Ubuntu 16.04 and in an HPC environment running under Red Hat Enterprise 7.1. 98 | 99 | Thank you 100 | 101 | ## citation 102 | 103 | Adrien Leger & Tommaso Leonardi. (2021, April 28). MetaCompore. Zenodo. http://doi.org/10.5281/zenodo.4726171 104 | 105 | ## licence 106 | 107 | MIT (https://mit-license.org/) 108 | 109 | Copyright © 2020 Adrien Leger 110 | 111 | ## Authors 112 | 113 | * Adrien Leger / contact@adrienleger.com / https://adrienleger.com 114 | -------------------------------------------------------------------------------- /versipy.yaml: -------------------------------------------------------------------------------- 1 | version: 2 | major: 0 3 | minor: 1 4 | micro: 3 5 | a: null 6 | b: null 7 | rc: null 8 | post: null 9 | dev: null 10 | managed_values: 11 | __package_name__: MetaCompore 12 | __package_description__: Metacompore is a snakemake pipeline running multiple RNA 13 | modifications detection tools for nanopore directRNA sequencing 14 | __package_url__: https://github.com/a-slide/MetaCompore 15 | __package_licence__: MIT 16 | __package_licence_url__: https://mit-license.org/ 17 | __author_name__: Adrien Leger 18 | __author_email__: contact@adrienleger.com 19 | __author_url__: https://adrienleger.com 20 | __citation__: Adrien Leger & Tommaso Leonardi. (2021, April 28). MetaCompore. Zenodo. 21 | http://doi.org/10.5281/zenodo.4726171 22 | managed_files: 23 | .versipy/README.md: README.md 24 | -------------------------------------------------------------------------------- /versipy_history.txt: -------------------------------------------------------------------------------- 1 | 2021-02-04 12:59:16.607824 0.0.0 Initialise versipy history 2 | 2021-02-04 13:21:26.939922 0.1.0 first versipy assisted release 3 | 2021-02-04 15:09:17.429634 0.1.1 readme update 4 | 2021-02-22 09:14:09.983599 0.1.2 Add epinano support 5 | 2021-04-28 22:59:16.101216 0.1.3 Update readme and release new version -v 6 | --------------------------------------------------------------------------------