├── docs ├── requirements.txt ├── source │ ├── _build │ │ ├── objects.inv │ │ ├── _static │ │ │ ├── file.png │ │ │ ├── plus.png │ │ │ ├── minus.png │ │ │ ├── css │ │ │ │ ├── fonts │ │ │ │ │ ├── lato-bold.woff │ │ │ │ │ ├── lato-bold.woff2 │ │ │ │ │ ├── lato-normal.woff │ │ │ │ │ ├── lato-normal.woff2 │ │ │ │ │ ├── Roboto-Slab-Bold.woff │ │ │ │ │ ├── Roboto-Slab-Bold.woff2 │ │ │ │ │ ├── fontawesome-webfont.eot │ │ │ │ │ ├── fontawesome-webfont.ttf │ │ │ │ │ ├── lato-bold-italic.woff │ │ │ │ │ ├── lato-bold-italic.woff2 │ │ │ │ │ ├── lato-normal-italic.woff │ │ │ │ │ ├── Roboto-Slab-Regular.woff │ │ │ │ │ ├── Roboto-Slab-Regular.woff2 │ │ │ │ │ ├── fontawesome-webfont.woff │ │ │ │ │ ├── fontawesome-webfont.woff2 │ │ │ │ │ └── lato-normal-italic.woff2 │ │ │ │ └── badge_only.css │ │ │ ├── graphviz.css │ │ │ ├── documentation_options.js │ │ │ ├── js │ │ │ │ ├── badge_only.js │ │ │ │ ├── html5shiv.min.js │ │ │ │ ├── html5shiv-printshiv.min.js │ │ │ │ └── theme.js │ │ │ ├── pygments.css │ │ │ ├── _sphinx_javascript_frameworks_compat.js │ │ │ ├── doctools.js │ │ │ ├── sphinx_highlight.js │ │ │ ├── language_data.js │ │ │ ├── underscore.js │ │ │ └── basic.css │ │ ├── .doctrees │ │ │ ├── index.doctree │ │ │ ├── usage.doctree │ │ │ ├── environment.pickle │ │ │ └── autoapi │ │ │ │ ├── index.doctree │ │ │ │ └── evoaug │ │ │ │ ├── index.doctree │ │ │ │ ├── augment │ │ │ │ └── index.doctree │ │ │ │ └── evoaug │ │ │ │ └── index.doctree │ │ ├── .buildinfo │ │ ├── _sources │ │ │ ├── autoapi │ │ │ │ ├── index.rst.txt │ │ │ │ └── evoaug │ │ │ │ │ ├── index.rst.txt │ │ │ │ │ ├── evoaug │ │ │ │ │ └── index.rst.txt │ │ │ │ │ └── augment │ │ │ │ │ └── index.rst.txt │ │ │ ├── index.rst.txt │ │ │ └── usage.rst.txt │ │ ├── search.html │ │ ├── py-modindex.html │ │ ├── autoapi │ │ │ ├── index.html │ │ │ └── evoaug │ │ │ │ ├── index.html │ │ │ │ └── evoaug │ │ │ │ └── index.html │ │ ├── index.html │ │ ├── searchindex.js │ │ ├── genindex.html │ │ └── usage.html │ ├── index.rst │ ├── conf.py │ └── usage.rst ├── Makefile └── make.bat ├── fig ├── overview.png └── augmentations.png ├── evoaug ├── __init__.py ├── evoaug.py └── augment.py ├── setup.py ├── LICENSE ├── .gitignore └── README.md /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx-autoapi 2 | 3 | -------------------------------------------------------------------------------- /fig/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/fig/overview.png -------------------------------------------------------------------------------- /fig/augmentations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/fig/augmentations.png -------------------------------------------------------------------------------- /docs/source/_build/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/objects.inv -------------------------------------------------------------------------------- /docs/source/_build/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/file.png -------------------------------------------------------------------------------- /docs/source/_build/_static/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/plus.png -------------------------------------------------------------------------------- /docs/source/_build/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/minus.png -------------------------------------------------------------------------------- /docs/source/_build/.doctrees/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/.doctrees/index.doctree -------------------------------------------------------------------------------- /docs/source/_build/.doctrees/usage.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/.doctrees/usage.doctree -------------------------------------------------------------------------------- /docs/source/_build/.doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/.doctrees/environment.pickle -------------------------------------------------------------------------------- /docs/source/_build/.doctrees/autoapi/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/.doctrees/autoapi/index.doctree -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/lato-bold.woff -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/lato-normal.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/lato-normal.woff -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/lato-normal.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/lato-normal.woff2 -------------------------------------------------------------------------------- /docs/source/_build/.doctrees/autoapi/evoaug/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/.doctrees/autoapi/evoaug/index.doctree -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/Roboto-Slab-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/Roboto-Slab-Bold.woff -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/Roboto-Slab-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/Roboto-Slab-Bold.woff2 -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/lato-bold-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/lato-bold-italic.woff -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/lato-bold-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/lato-bold-italic.woff2 -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/lato-normal-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/lato-normal-italic.woff -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/Roboto-Slab-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/Roboto-Slab-Regular.woff -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/Roboto-Slab-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/Roboto-Slab-Regular.woff2 -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/source/_build/_static/css/fonts/lato-normal-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/_static/css/fonts/lato-normal-italic.woff2 -------------------------------------------------------------------------------- /docs/source/_build/.doctrees/autoapi/evoaug/augment/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/.doctrees/autoapi/evoaug/augment/index.doctree -------------------------------------------------------------------------------- /docs/source/_build/.doctrees/autoapi/evoaug/evoaug/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/p-koo/evoaug/HEAD/docs/source/_build/.doctrees/autoapi/evoaug/evoaug/index.doctree -------------------------------------------------------------------------------- /evoaug/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | EvoAug is a PyTorch package to pretrain sequence-based deep learning models for 3 | regulatory genomics data with evolution-inspired data augmentations followed by a 4 | finetuning on the original, unperturbed sequence data. 5 | """ 6 | -------------------------------------------------------------------------------- /docs/source/_build/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: 3197109dbe0186203cacd39c7bdaea47 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /docs/source/_build/_sources/autoapi/index.rst.txt: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | This page contains auto-generated API reference documentation [#f1]_. 5 | 6 | .. toctree:: 7 | :titlesonly: 8 | 9 | /autoapi/evoaug/index 10 | 11 | .. [#f1] Created with `sphinx-autoapi `_ -------------------------------------------------------------------------------- /docs/source/_build/_static/graphviz.css: -------------------------------------------------------------------------------- 1 | /* 2 | * graphviz.css 3 | * ~~~~~~~~~~~~ 4 | * 5 | * Sphinx stylesheet -- graphviz extension. 6 | * 7 | * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. 8 | * :license: BSD, see LICENSE for details. 9 | * 10 | */ 11 | 12 | img.graphviz { 13 | border: 0; 14 | max-width: 100%; 15 | } 16 | 17 | object.graphviz { 18 | max-width: 100%; 19 | } 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | 4 | setup( 5 | name="evoaug", 6 | version="1.0.6", 7 | packages=find_packages(), 8 | description = "A Python package that trains models with evolution-inspired data augmentations. ", 9 | python_requires=">=3.6", 10 | install_requires=[ 11 | "lightning>=2.0.0", 12 | "torch>=1.12.0", 13 | "numpy>=1.21.0" 14 | ], 15 | ) 16 | -------------------------------------------------------------------------------- /docs/source/_build/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | var DOCUMENTATION_OPTIONS = { 2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), 3 | VERSION: '0.1', 4 | LANGUAGE: 'en', 5 | COLLAPSE_INDEX: false, 6 | BUILDER: 'html', 7 | FILE_SUFFIX: '.html', 8 | LINK_SUFFIX: '.html', 9 | HAS_SOURCE: true, 10 | SOURCELINK_SUFFIX: '.txt', 11 | NAVIGATION_WITH_KEYS: false, 12 | SHOW_SEARCH_SUMMARY: true, 13 | ENABLE_SEARCH_SHORTCUTS: true, 14 | }; -------------------------------------------------------------------------------- /docs/source/_build/_sources/autoapi/evoaug/index.rst.txt: -------------------------------------------------------------------------------- 1 | :py:mod:`evoaug` 2 | ================ 3 | 4 | .. py:module:: evoaug 5 | 6 | .. autoapi-nested-parse:: 7 | 8 | EvoAug is a PyTorch package to pretrain sequence-based deep learning models for 9 | regulatory genomics data with evolution-inspired data augmentations followed by a 10 | finetuning on the original, unperturbed sequence data. 11 | 12 | 13 | 14 | Submodules 15 | ---------- 16 | .. toctree:: 17 | :titlesonly: 18 | :maxdepth: 1 19 | 20 | augment/index.rst 21 | evoaug/index.rst 22 | 23 | 24 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to EvoAug's documentation! 2 | =================================== 3 | 4 | **EvoAug** is a Python library to train PyTorch 5 | models on regulatory genomics data with evolution-inspired 6 | data augmentations. 7 | 8 | Check out the :doc:`usage` for further information, including 9 | how to install the project. 10 | 11 | .. note:: 12 | 13 | This project is under active development. Source code can be found `here `_. 14 | 15 | Contents 16 | -------- 17 | 18 | .. toctree:: 19 | :maxdepth: 2 20 | 21 | usage 22 | -------------------------------------------------------------------------------- /docs/source/_build/_sources/index.rst.txt: -------------------------------------------------------------------------------- 1 | Welcome to EvoAug's documentation! 2 | =================================== 3 | 4 | **EvoAug** is a Python library to train PyTorch 5 | models on regulatory genomics data with evolution-inspired 6 | data augmentations. 7 | 8 | Check out the :doc:`usage` for further information, including 9 | how to install the project. 10 | 11 | .. note:: 12 | 13 | This project is under active development. Source code can be found `here `_. 14 | 15 | Contents 16 | -------- 17 | 18 | .. toctree:: 19 | :maxdepth: 2 20 | 21 | usage 22 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/source/_build/_static/js/badge_only.js: -------------------------------------------------------------------------------- 1 | !function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}}); -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | 3 | # -- Project information 4 | 5 | project = 'EvoAug' 6 | copyright = '2021, KooLab' 7 | author = 'KooLab' 8 | 9 | release = '0.1' 10 | version = '0.1.1' 11 | 12 | # -- General configuration 13 | 14 | extensions = [ 15 | 'sphinx.ext.duration', 16 | 'sphinx.ext.doctest', 17 | 'sphinx.ext.autodoc', 18 | 'sphinx.ext.autosummary', 19 | 'sphinx.ext.intersphinx', 20 | 'sphinx.ext.napoleon', 21 | 'autoapi.extension', 22 | ] 23 | 24 | autoapi_type = 'python' 25 | autoapi_dirs = ['../../evoaug'] 26 | 27 | intersphinx_mapping = { 28 | 'python': ('https://docs.python.org/3/', None), 29 | 'torch': ('https://pytorch.org/docs/stable', None), 30 | } 31 | intersphinx_disabled_domains = ['std'] 32 | 33 | templates_path = ['_templates'] 34 | 35 | # -- Options for HTML output 36 | 37 | html_theme = 'sphinx_rtd_theme' 38 | 39 | # -- Options for EPUB output 40 | epub_show_urls = 'footnote' 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Peter K. Koo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | .DS_Store 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | docs/build/ 68 | docs/source/generated/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | .env 87 | .venv 88 | env/ 89 | venv/ 90 | ENV/ 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | .spyproject 95 | 96 | # Rope project settings 97 | .ropeproject 98 | 99 | # mkdocs documentation 100 | /site 101 | 102 | # mypy 103 | .mypy_cache/ 104 | -------------------------------------------------------------------------------- /docs/source/_build/_static/js/html5shiv.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed 3 | */ 4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document); -------------------------------------------------------------------------------- /docs/source/_build/_static/css/badge_only.css: -------------------------------------------------------------------------------- 1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} -------------------------------------------------------------------------------- /docs/source/usage.rst: -------------------------------------------------------------------------------- 1 | User Guide 2 | ========== 3 | 4 | .. _installation: 5 | 6 | Installation 7 | ------------ 8 | 9 | To use EvoAug, first install it using pip: 10 | 11 | .. code-block:: console 12 | 13 | pip install evoaug 14 | 15 | Example 16 | ------- 17 | 18 | Import evoaug: 19 | 20 | .. code-block:: python 21 | 22 | from evoaug import evoaug, augment 23 | import lightning.pytorch as pl 24 | 25 | 26 | Define PyTorch model and modeling choices: 27 | 28 | .. code-block:: python 29 | 30 | model = "DEFINE PYTORCH MODEL" 31 | loss = "DEFINE PYTORCH LOSS" 32 | optimizer_dict = "DEFINE OPTIMIZER OR OPTIMIZER DICT" 33 | ckpt_aug_path = "path-to-aug-checkpoint.ckpt" 34 | ckpt_finetune_path = "path-to-finetune-checkpoint.ckpt" 35 | 36 | 37 | Train model with augmentations: 38 | 39 | .. code-block:: python 40 | 41 | augment_list = [ 42 | augment.RandomDeletion(delete_min=0, delete_max=20), 43 | augment.RandomRC(rc_prob=0.5), 44 | augment.RandomInsertion(insert_min=0, insert_max=20), 45 | augment.RandomTranslocation(shift_min=0, shift_max=20), 46 | augment.RandomMutation(mut_frac=0.05), 47 | augment.RandomNoise(noise_mean=0, noise_std=0.2), 48 | ] 49 | 50 | robust_model = evoaug.RobustModel( 51 | model, 52 | criterion=loss, 53 | optimizer=optimizer_dict, 54 | augment_list=augment_list, 55 | max_augs_per_seq=2, # maximum number of augmentations per sequence 56 | hard_aug=True, # use max_augs_per_seq, otherwise sample randomly up to max 57 | inference_aug=False, # if true, keep augmentations on during inference time 58 | ) 59 | 60 | # set up callback 61 | callback_topmodel = pl.callbacks.ModelCheckpoint( 62 | monitor="val_loss", save_top_k=1, dirpath=output_dir, filename=ckpt_aug_path 63 | ) 64 | 65 | # train model 66 | trainer = pl.Trainer( 67 | accelerator="gpu", 68 | devices=1, 69 | max_epochs=100, 70 | logger=None, 71 | callbacks=["ADD CALLBACKS", "callback_topmodel"], 72 | ) 73 | 74 | # pre-train model with augmentations 75 | trainer.fit(robust_model, datamodule=data_module) 76 | 77 | # load best model 78 | robust_model = evoaug.load_model_from_checkpoint(robust_model, ckpt_aug_path) 79 | 80 | 81 | Fine-tune model without augmentations: 82 | 83 | .. code-block:: python 84 | 85 | # set up fine-tuning 86 | robust_model.finetune = True 87 | robust_model.optimizer = "set up optimizer for fine-tuning" 88 | 89 | # set up callback 90 | callback_topmodel = pl.callbacks.ModelCheckpoint( 91 | monitor="val_loss", 92 | save_top_k=1, 93 | dirpath=output_dir, 94 | filename=ckpt_finetune_path, 95 | ) 96 | 97 | # set up pytorch lightning trainer 98 | trainer = pl.Trainer( 99 | accelerator="gpu", 100 | devices=1, 101 | max_epochs=100, 102 | logger=None, 103 | callbacks=["ADD CALLBACKS", "callback_topmodel"], 104 | ) 105 | 106 | # fine-tune model 107 | trainer.fit(robust_model, datamodule=data_module) 108 | 109 | # load best fine-tuned model 110 | robust_model = evoaug.load_model_from_checkpoint(robust_model, ckpt_finetune_path) 111 | 112 | 113 | 114 | Examples on Google Colab 115 | ------------------------ 116 | 117 | DeepSTARR analysis: 118 | 119 | .. code-block:: python 120 | 121 | https://colab.research.google.com/drive/1a2fiRPBd1xvoJf0WNiMUgTYiLTs1XETf?usp=sharing 122 | 123 | 124 | ChIP-seq analysis: 125 | 126 | .. code-block:: python 127 | 128 | https://colab.research.google.com/drive/1GZ8v4Tq3LQMZI30qvdhF7ZW6Kf5GDyKX?usp=sharing 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /docs/source/_build/_sources/usage.rst.txt: -------------------------------------------------------------------------------- 1 | User Guide 2 | ========== 3 | 4 | .. _installation: 5 | 6 | Installation 7 | ------------ 8 | 9 | To use EvoAug, first install it using pip: 10 | 11 | .. code-block:: console 12 | 13 | pip install evoaug 14 | 15 | Example 16 | ------- 17 | 18 | Import evoaug: 19 | 20 | .. code-block:: python 21 | 22 | from evoaug import evoaug, augment 23 | import pytorch_lightning as pl 24 | 25 | 26 | Define PyTorch model and modeling choices: 27 | 28 | .. code-block:: python 29 | 30 | model = "DEFINE PYTORCH MODEL" 31 | loss = "DEFINE PYTORCH LOSS" 32 | optimizer_dict = "DEFINE OPTIMIZER OR OPTIMIZER DICT" 33 | ckpt_aug_path = "path-to-aug-checkpoint.ckpt" 34 | ckpt_finetune_path = "path-to-finetune-checkpoint.ckpt" 35 | 36 | 37 | Train model with augmentations: 38 | 39 | .. code-block:: python 40 | 41 | augment_list = [ 42 | augment.RandomDeletion(delete_min=0, delete_max=20), 43 | augment.RandomRC(rc_prob=0.5), 44 | augment.RandomInsertion(insert_min=0, insert_max=20), 45 | augment.RandomTranslocation(shift_min=0, shift_max=20), 46 | augment.RandomMutation(mut_frac=0.05), 47 | augment.RandomNoise(noise_mean=0, noise_std=0.2), 48 | ] 49 | 50 | robust_model = evoaug.RobustModel( 51 | model, 52 | criterion=loss, 53 | optimizer=optimizer_dict, 54 | augment_list=augment_list, 55 | max_augs_per_seq=2, # maximum number of augmentations per sequence 56 | hard_aug=True, # use max_augs_per_seq, otherwise sample randomly up to max 57 | inference_aug=False, # if true, keep augmentations on during inference time 58 | ) 59 | 60 | # set up callback 61 | callback_topmodel = pl.callbacks.ModelCheckpoint( 62 | monitor="val_loss", save_top_k=1, dirpath=output_dir, filename=ckpt_aug_path 63 | ) 64 | 65 | # train model 66 | trainer = pl.Trainer( 67 | gpus=1, 68 | max_epochs=100, 69 | auto_select_gpus=True, 70 | logger=None, 71 | callbacks=["ADD CALLBACKS", "callback_topmodel"], 72 | ) 73 | 74 | # pre-train model with augmentations 75 | trainer.fit(robust_model, datamodule=data_module) 76 | 77 | # load best model 78 | robust_model = evoaug.load_model_from_checkpoint(robust_model, ckpt_aug_path) 79 | 80 | 81 | Fine-tune model without augmentations: 82 | 83 | .. code-block:: python 84 | 85 | # set up fine-tuning 86 | robust_model.finetune = True 87 | robust_model.optimizer = "set up optimizer for fine-tuning" 88 | 89 | # set up callback 90 | callback_topmodel = pl.callbacks.ModelCheckpoint( 91 | monitor="val_loss", 92 | save_top_k=1, 93 | dirpath=output_dir, 94 | filename=ckpt_finetune_path, 95 | ) 96 | 97 | # set up pytorch lightning trainer 98 | trainer = pl.Trainer( 99 | gpus=1, 100 | max_epochs=100, 101 | auto_select_gpus=True, 102 | logger=None, 103 | callbacks=["ADD CALLBACKS", "callback_topmodel"], 104 | ) 105 | 106 | # fine-tune model 107 | trainer.fit(robust_model, datamodule=data_module) 108 | 109 | # load best fine-tuned model 110 | robust_model = evoaug.load_model_from_checkpoint(robust_model, ckpt_finetune_path) 111 | 112 | 113 | 114 | Examples on Google Colab 115 | ------------------------ 116 | 117 | DeepSTARR analysis: 118 | 119 | .. code-block:: python 120 | 121 | https://colab.research.google.com/drive/1a2fiRPBd1xvoJf0WNiMUgTYiLTs1XETf?usp=sharing 122 | 123 | 124 | ChIP-seq analysis: 125 | 126 | .. code-block:: python 127 | 128 | https://colab.research.google.com/drive/1GZ8v4Tq3LQMZI30qvdhF7ZW6Kf5GDyKX?usp=sharing 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /docs/source/_build/_sources/autoapi/evoaug/evoaug/index.rst.txt: -------------------------------------------------------------------------------- 1 | :py:mod:`evoaug.evoaug` 2 | ======================= 3 | 4 | .. py:module:: evoaug.evoaug 5 | 6 | .. autoapi-nested-parse:: 7 | 8 | Model (implemented in Pytorch Lightning) demonstrating how to use augmentations 9 | during training. 10 | 11 | 12 | 13 | Module Contents 14 | --------------- 15 | 16 | Classes 17 | ~~~~~~~ 18 | 19 | .. autoapisummary:: 20 | 21 | evoaug.evoaug.RobustModel 22 | 23 | 24 | 25 | Functions 26 | ~~~~~~~~~ 27 | 28 | .. autoapisummary:: 29 | 30 | evoaug.evoaug.load_model_from_checkpoint 31 | evoaug.evoaug.augment_max_len 32 | 33 | 34 | 35 | .. py:class:: RobustModel(model, criterion, optimizer, augment_list=[], max_augs_per_seq=0, hard_aug=True, finetune=False, inference_aug=False) 36 | 37 | Bases: :py:obj:`pytorch_lightning.core.lightning.LightningModule` 38 | 39 | PyTorch Lightning module to specify how augmentation should be applied to a model. 40 | 41 | :param model: PyTorch model. 42 | :type model: torch.nn.Module 43 | :param criterion: PyTorch loss function 44 | :type criterion: callable 45 | :param optimizer: PyTorch optimizer as a class or dictionary 46 | :type optimizer: torch.optim.Optimizer or dict 47 | :param augment_list: List of data augmentations, each a callable class from augment.py. 48 | Default is empty list -- no augmentations. 49 | :type augment_list: list 50 | :param max_augs_per_seq: Maximum number of augmentations to apply to each sequence. Value is superceded by the number of augmentations in augment_list. 51 | :type max_augs_per_seq: int 52 | :param hard_aug: Flag to set a hard number of augmentations, otherwise the number of augmentations is set randomly up to max_augs_per_seq, default is True. 53 | :type hard_aug: bool 54 | :param finetune: Flag to turn off augmentations during training, default is False. 55 | :type finetune: bool 56 | :param inference_aug: Flag to turn on augmentations during inference, default is False. 57 | :type inference_aug: bool 58 | 59 | .. py:method:: forward(x) 60 | 61 | Standard forward pass. 62 | 63 | 64 | .. py:method:: configure_optimizers() 65 | 66 | Standard optimizer configuration. 67 | 68 | 69 | .. py:method:: training_step(batch, batch_idx) 70 | 71 | Training step with augmentations. 72 | 73 | 74 | .. py:method:: validation_step(batch, batch_idx) 75 | 76 | Validation step without (or with) augmentations. 77 | 78 | 79 | .. py:method:: test_step(batch, batch_idx) 80 | 81 | Test step without (or with) augmentations. 82 | 83 | 84 | .. py:method:: predict_step(batch, batch_idx) 85 | 86 | Prediction step without (or with) augmentations. 87 | 88 | 89 | .. py:method:: _sample_aug_combos(batch_size) 90 | 91 | Set the number of augmentations and randomly select augmentations to apply 92 | to each sequence. 93 | 94 | 95 | .. py:method:: _apply_augment(x) 96 | 97 | Apply augmentations to each sequence in batch, x. 98 | 99 | 100 | .. py:method:: _pad_end(x) 101 | 102 | Add random DNA padding of length insert_max to the end of each sequence in batch. 103 | 104 | 105 | .. py:method:: finetune_mode(optimizer=None) 106 | 107 | Turn on finetune flag -- no augmentations during training. 108 | 109 | 110 | 111 | .. py:function:: load_model_from_checkpoint(model, checkpoint_path) 112 | 113 | Load PyTorch lightning model from checkpoint. 114 | 115 | :param model: RobustModel instance. 116 | :type model: RobustModel 117 | :param checkpoint_path: path to checkpoint of model weights 118 | :type checkpoint_path: str 119 | 120 | :returns: Object with weights and config loaded from checkpoint. 121 | :rtype: RobustModel 122 | 123 | 124 | .. py:function:: augment_max_len(augment_list) 125 | 126 | Determine whether insertions are applied to determine the insert_max, 127 | which will be applied to pad other sequences with random DNA. 128 | 129 | :param augment_list: List of augmentations. 130 | :type augment_list: list 131 | 132 | :returns: Value for insert max. 133 | :rtype: int 134 | 135 | 136 | -------------------------------------------------------------------------------- /docs/source/_build/_static/js/html5shiv-printshiv.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @preserve HTML5 Shiv 3.7.3-pre | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed 3 | */ 4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML="",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document); -------------------------------------------------------------------------------- /docs/source/_build/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Search — EvoAug 0.1 documentation 7 | 8 | 9 | 10 | 11 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 |
30 | 54 | 55 |
59 | 60 |
61 |
62 |
63 |
    64 |
  • »
  • 65 |
  • Search
  • 66 |
  • 67 |
  • 68 |
69 |
70 |
71 |
72 |
73 | 74 | 81 | 82 | 83 |
84 | 85 |
86 | 87 |
88 |
89 |
90 | 91 |
92 | 93 |
94 |

© Copyright 2021, KooLab.

95 |
96 | 97 | Built with Sphinx using a 98 | theme 99 | provided by Read the Docs. 100 | 101 | 102 |
103 |
104 |
105 |
106 |
107 | 112 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EvoAug 2 | 3 | EvoAug is a PyTorch package to pretrain sequence-based deep learning models for regulatory genomics data with evolution-inspired data augmentations followed by a finetuning on the original, unperturbed sequence data. This work uses PyTorch Lightning -- LightningModule -- to define a model wrapper that is used for training. This is work that directly follows from "EvoAug: improving generalization and interpretability of genomic deep neural networks with evolution-inspired data augmentations" by Nicholas Keone Lee, Ziqi (Amber) Tang, Shushan Toneyan, and Peter K Koo. Code in this repository is shared under the MIT License. For additional information, see documentation on [EvoAug.ReadTheDocs.io](https://evoaug.readthedocs.io/en/latest/index.html). 4 | 5 | For questions, email: koo@cshl.edu 6 | 7 | fig 8 | 9 | overview 10 | 11 | 12 | 13 | #### Install: 14 | 15 | ``` 16 | pip install evoaug 17 | ``` 18 | 19 | 20 | #### Dependencies: 21 | 22 | ``` 23 | torch 1.12.1+cu113 24 | lightning >= 2.0.0 25 | numpy 1.21.6 26 | ``` 27 | 28 | Note: This package has been updated to use the newer `lightning` package instead of `pytorch_lightning`. For older versions that use pytorch_lightning, the pl.Trainer call will need to be modified accordingly as the arguments for gpus has changed from version 1.7. 29 | 30 | #### Example 31 | 32 | ```python 33 | from evoaug import evoaug, augment 34 | import lightning.pytorch as pl 35 | 36 | model = "DEFINE PYTORCH MODEL" 37 | loss = "DEFINE PYTORCH LOSS" 38 | optimizer_dict = "DEFINE OPTIMIZER OR OPTIMIZER DICT" 39 | 40 | augment_list = [ 41 | augment.RandomDeletion(delete_min=0, delete_max=20), 42 | augment.RandomRC(rc_prob=0.5), 43 | augment.RandomInsertion(insert_min=0, insert_max=20), 44 | augment.RandomTranslocation(shift_min=0, shift_max=20), 45 | augment.RandomMutation(mut_frac=0.05), 46 | augment.RandomNoise(noise_mean=0, noise_std=0.2), 47 | ] 48 | 49 | robust_model = evoaug.RobustModel( 50 | model, 51 | criterion=loss, 52 | optimizer=optimizer_dict, 53 | augment_list=augment_list, 54 | max_augs_per_seq=2, # maximum number of augmentations per sequence 55 | hard_aug=True, # use max_augs_per_seq, otherwise sample randomly up to max 56 | inference_aug=False # if true, keep augmentations on during inference time 57 | ) 58 | 59 | # set up callback 60 | callback_topmodel = pl.callbacks.ModelCheckpoint( 61 | monitor='val_loss', 62 | save_top_k=1, 63 | dirpath=output_dir, 64 | filename=ckpt_aug_path 65 | ) 66 | 67 | # train model 68 | trainer = pl.Trainer( 69 | accelerator="gpu", 70 | devices=1, 71 | max_epochs=100, 72 | logger=None, 73 | callbacks=["ADD CALLBACKS", callback_topmodel] 74 | ) 75 | 76 | # pre-train model with augmentations 77 | trainer.fit(robust_model, datamodule=data_module) 78 | 79 | # load best model 80 | robust_model = evoaug.load_model_from_checkpoint(robust_model, ckpt_aug_path) 81 | 82 | # set up fine-tuning 83 | robust_model.finetune = True 84 | robust_model.optimizer = # set up optimizer for fine-tuning 85 | 86 | # set up callback 87 | callback_topmodel = pl.callbacks.ModelCheckpoint( 88 | monitor='val_loss', 89 | save_top_k=1, 90 | dirpath=output_dir, 91 | filename=ckpt_finetune_path 92 | ) 93 | 94 | # set up pytorch lightning trainer 95 | trainer = pl.Trainer( 96 | accelerator="gpu", 97 | devices=1, 98 | max_epochs=100, 99 | logger=None, 100 | callbacks=["ADD CALLBACKS", callback_topmodel] 101 | ) 102 | 103 | # fine-tune model 104 | trainer.fit(robust_model, datamodule=data_module) 105 | 106 | # load best fine-tuned model 107 | robust_model = evoaug.load_model_from_checkpoint(robust_model, ckpt_finetune_path) 108 | ``` 109 | 110 | 111 | #### Examples on Google Colab: 112 | 113 | DeepSTARR analysis: 114 | - Example analysis: https://colab.research.google.com/drive/1a2fiRPBd1xvoJf0WNiMUgTYiLTs1XETf?usp=sharing 115 | - Example load model and perform attribution analysis: https://colab.research.google.com/drive/11DVkhyX2VhhCSbCGkW3XjviMxTufBvZh?usp=sharing 116 | 117 | ChIP-seq analysis: 118 | - Example analysis: https://colab.research.google.com/drive/1GZ8v4Tq3LQMZI30qvdhF7ZW6Kf5GDyKX?usp=sharing 119 | 120 | # Citation 121 | 122 | If you find out work useful, please cite our paper. 123 | 124 | ```bibtex 125 | @article{lee2023evoaug, 126 | title={EvoAug: improving generalization and interpretability of genomic deep neural networks with evolution-inspired data augmentations}, 127 | author={Lee, Nicholas Keone and Tang, Ziqi and Toneyan, Shushan and Koo, Peter K}, 128 | journal={Genome Biology}, 129 | volume={24}, 130 | number={1}, 131 | pages={105}, 132 | year={2023}, 133 | publisher={Springer} 134 | } 135 | ``` 136 | -------------------------------------------------------------------------------- /docs/source/_build/_static/js/theme.js: -------------------------------------------------------------------------------- 1 | !function(n){var e={};function t(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return n[i].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=n,t.c=e,t.d=function(n,e,i){t.o(n,e)||Object.defineProperty(n,e,{enumerable:!0,get:i})},t.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},t.t=function(n,e){if(1&e&&(n=t(n)),8&e)return n;if(4&e&&"object"==typeof n&&n&&n.__esModule)return n;var i=Object.create(null);if(t.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:n}),2&e&&"string"!=typeof n)for(var o in n)t.d(i,o,function(e){return n[e]}.bind(null,o));return i},t.n=function(n){var e=n&&n.__esModule?function(){return n.default}:function(){return n};return t.d(e,"a",e),e},t.o=function(n,e){return Object.prototype.hasOwnProperty.call(n,e)},t.p="",t(t.s=0)}([function(n,e,t){t(1),n.exports=t(3)},function(n,e,t){(function(){var e="undefined"!=typeof window?window.jQuery:t(2);n.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var t=this;void 0===n&&(n=!0),t.isRunning||(t.isRunning=!0,e((function(e){t.init(e),t.reset(),t.win.on("hashchange",t.reset),n&&t.win.on("scroll",(function(){t.linkScroll||t.winScroll||(t.winScroll=!0,requestAnimationFrame((function(){t.onScroll()})))})),t.win.on("resize",(function(){t.winResize||(t.winResize=!0,requestAnimationFrame((function(){t.onResize()})))})),t.onResize()})))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",(function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")})).on("click",".wy-menu-vertical .current ul li a",(function(){var t=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(t),e.hashChange()})).on("click","[data-toggle='rst-current-version']",(function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")})),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("
"),n("table.docutils.footnote").wrap("
"),n("table.docutils.citation").wrap("
"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n(''),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}if(t.length>0){$(".wy-menu-vertical .current").removeClass("current").attr("aria-expanded","false"),t.addClass("current").attr("aria-expanded","true"),t.closest("li.toctree-l1").parent().addClass("current").attr("aria-expanded","true");for(let n=1;n<=10;n++)t.closest("li.toctree-l"+n).addClass("current").attr("aria-expanded","true");t[0].scrollIntoView()}}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current").attr("aria-expanded","false"),e.siblings().find("li.current").removeClass("current").attr("aria-expanded","false");var t=e.find("> ul li");t.length&&(t.removeClass("current").attr("aria-expanded","false"),e.toggleClass("current").attr("aria-expanded",(function(n,e){return"true"==e?"false":"true"})))}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t= 0 && 67 | !jQuery(node.parentNode).hasClass(className) && 68 | !jQuery(node.parentNode).hasClass("nohighlight")) { 69 | var span; 70 | var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg"); 71 | if (isInSVG) { 72 | span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); 73 | } else { 74 | span = document.createElement("span"); 75 | span.className = className; 76 | } 77 | span.appendChild(document.createTextNode(val.substr(pos, text.length))); 78 | node.parentNode.insertBefore(span, node.parentNode.insertBefore( 79 | document.createTextNode(val.substr(pos + text.length)), 80 | node.nextSibling)); 81 | node.nodeValue = val.substr(0, pos); 82 | if (isInSVG) { 83 | var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect"); 84 | var bbox = node.parentElement.getBBox(); 85 | rect.x.baseVal.value = bbox.x; 86 | rect.y.baseVal.value = bbox.y; 87 | rect.width.baseVal.value = bbox.width; 88 | rect.height.baseVal.value = bbox.height; 89 | rect.setAttribute('class', className); 90 | addItems.push({ 91 | "parent": node.parentNode, 92 | "target": rect}); 93 | } 94 | } 95 | } 96 | else if (!jQuery(node).is("button, select, textarea")) { 97 | jQuery.each(node.childNodes, function() { 98 | highlight(this, addItems); 99 | }); 100 | } 101 | } 102 | var addItems = []; 103 | var result = this.each(function() { 104 | highlight(this, addItems); 105 | }); 106 | for (var i = 0; i < addItems.length; ++i) { 107 | jQuery(addItems[i].parent).before(addItems[i].target); 108 | } 109 | return result; 110 | }; 111 | 112 | /* 113 | * backward compatibility for jQuery.browser 114 | * This will be supported until firefox bug is fixed. 115 | */ 116 | if (!jQuery.browser) { 117 | jQuery.uaMatch = function(ua) { 118 | ua = ua.toLowerCase(); 119 | 120 | var match = /(chrome)[ \/]([\w.]+)/.exec(ua) || 121 | /(webkit)[ \/]([\w.]+)/.exec(ua) || 122 | /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) || 123 | /(msie) ([\w.]+)/.exec(ua) || 124 | ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) || 125 | []; 126 | 127 | return { 128 | browser: match[ 1 ] || "", 129 | version: match[ 2 ] || "0" 130 | }; 131 | }; 132 | jQuery.browser = {}; 133 | jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true; 134 | } 135 | -------------------------------------------------------------------------------- /docs/source/_build/py-modindex.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Python Module Index — EvoAug 0.1 documentation 7 | 8 | 9 | 10 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 |
30 | 54 | 55 |
59 | 60 |
61 |
62 |
63 |
    64 |
  • »
  • 65 |
  • Python Module Index
  • 66 |
  • 67 |
  • 68 |
69 |
70 |
71 |
72 |
73 | 74 | 75 |

Python Module Index

76 | 77 |
78 | e 79 |
80 | 81 | 82 | 83 | 85 | 86 | 88 | 91 | 92 | 93 | 96 | 97 | 98 | 101 |
 
84 | e
89 | evoaug 90 |
    94 | evoaug.augment 95 |
    99 | evoaug.evoaug 100 |
102 | 103 | 104 |
105 |
106 |
107 | 108 |
109 | 110 |
111 |

© Copyright 2021, KooLab.

112 |
113 | 114 | Built with Sphinx using a 115 | theme 116 | provided by Read the Docs. 117 | 118 | 119 |
120 |
121 |
122 |
123 |
124 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /docs/source/_build/_static/doctools.js: -------------------------------------------------------------------------------- 1 | /* 2 | * doctools.js 3 | * ~~~~~~~~~~~ 4 | * 5 | * Base JavaScript utilities for all Sphinx HTML documentation. 6 | * 7 | * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. 8 | * :license: BSD, see LICENSE for details. 9 | * 10 | */ 11 | "use strict"; 12 | 13 | const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ 14 | "TEXTAREA", 15 | "INPUT", 16 | "SELECT", 17 | "BUTTON", 18 | ]); 19 | 20 | const _ready = (callback) => { 21 | if (document.readyState !== "loading") { 22 | callback(); 23 | } else { 24 | document.addEventListener("DOMContentLoaded", callback); 25 | } 26 | }; 27 | 28 | /** 29 | * Small JavaScript module for the documentation. 30 | */ 31 | const Documentation = { 32 | init: () => { 33 | Documentation.initDomainIndexTable(); 34 | Documentation.initOnKeyListeners(); 35 | }, 36 | 37 | /** 38 | * i18n support 39 | */ 40 | TRANSLATIONS: {}, 41 | PLURAL_EXPR: (n) => (n === 1 ? 0 : 1), 42 | LOCALE: "unknown", 43 | 44 | // gettext and ngettext don't access this so that the functions 45 | // can safely bound to a different name (_ = Documentation.gettext) 46 | gettext: (string) => { 47 | const translated = Documentation.TRANSLATIONS[string]; 48 | switch (typeof translated) { 49 | case "undefined": 50 | return string; // no translation 51 | case "string": 52 | return translated; // translation exists 53 | default: 54 | return translated[0]; // (singular, plural) translation tuple exists 55 | } 56 | }, 57 | 58 | ngettext: (singular, plural, n) => { 59 | const translated = Documentation.TRANSLATIONS[singular]; 60 | if (typeof translated !== "undefined") 61 | return translated[Documentation.PLURAL_EXPR(n)]; 62 | return n === 1 ? singular : plural; 63 | }, 64 | 65 | addTranslations: (catalog) => { 66 | Object.assign(Documentation.TRANSLATIONS, catalog.messages); 67 | Documentation.PLURAL_EXPR = new Function( 68 | "n", 69 | `return (${catalog.plural_expr})` 70 | ); 71 | Documentation.LOCALE = catalog.locale; 72 | }, 73 | 74 | /** 75 | * helper function to focus on search bar 76 | */ 77 | focusSearchBar: () => { 78 | document.querySelectorAll("input[name=q]")[0]?.focus(); 79 | }, 80 | 81 | /** 82 | * Initialise the domain index toggle buttons 83 | */ 84 | initDomainIndexTable: () => { 85 | const toggler = (el) => { 86 | const idNumber = el.id.substr(7); 87 | const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`); 88 | if (el.src.substr(-9) === "minus.png") { 89 | el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`; 90 | toggledRows.forEach((el) => (el.style.display = "none")); 91 | } else { 92 | el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`; 93 | toggledRows.forEach((el) => (el.style.display = "")); 94 | } 95 | }; 96 | 97 | const togglerElements = document.querySelectorAll("img.toggler"); 98 | togglerElements.forEach((el) => 99 | el.addEventListener("click", (event) => toggler(event.currentTarget)) 100 | ); 101 | togglerElements.forEach((el) => (el.style.display = "")); 102 | if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler); 103 | }, 104 | 105 | initOnKeyListeners: () => { 106 | // only install a listener if it is really needed 107 | if ( 108 | !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS && 109 | !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS 110 | ) 111 | return; 112 | 113 | document.addEventListener("keydown", (event) => { 114 | // bail for input elements 115 | if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; 116 | // bail with special keys 117 | if (event.altKey || event.ctrlKey || event.metaKey) return; 118 | 119 | if (!event.shiftKey) { 120 | switch (event.key) { 121 | case "ArrowLeft": 122 | if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; 123 | 124 | const prevLink = document.querySelector('link[rel="prev"]'); 125 | if (prevLink && prevLink.href) { 126 | window.location.href = prevLink.href; 127 | event.preventDefault(); 128 | } 129 | break; 130 | case "ArrowRight": 131 | if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; 132 | 133 | const nextLink = document.querySelector('link[rel="next"]'); 134 | if (nextLink && nextLink.href) { 135 | window.location.href = nextLink.href; 136 | event.preventDefault(); 137 | } 138 | break; 139 | } 140 | } 141 | 142 | // some keyboard layouts may need Shift to get / 143 | switch (event.key) { 144 | case "/": 145 | if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; 146 | Documentation.focusSearchBar(); 147 | event.preventDefault(); 148 | } 149 | }); 150 | }, 151 | }; 152 | 153 | // quick alias for translations 154 | const _ = Documentation.gettext; 155 | 156 | _ready(Documentation.init); 157 | -------------------------------------------------------------------------------- /docs/source/_build/autoapi/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | API Reference — EvoAug 0.1 documentation 7 | 8 | 9 | 10 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |
27 | 51 | 52 |
56 | 57 |
58 |
59 |
60 | 67 |
68 |
69 |
70 |
71 | 72 |
73 |

API Reference

74 |

This page contains auto-generated API reference documentation 1.

75 |
76 | 83 |
84 |
85 |
1
86 |

Created with sphinx-autoapi

87 |
88 |
89 |
90 | 91 | 92 |
93 |
94 |
95 | 96 |
97 | 98 |
99 |

© Copyright 2021, KooLab.

100 |
101 | 102 | Built with Sphinx using a 103 | theme 104 | provided by Read the Docs. 105 | 106 | 107 |
108 |
109 |
110 |
111 |
112 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /docs/source/_build/_static/sphinx_highlight.js: -------------------------------------------------------------------------------- 1 | /* Highlighting utilities for Sphinx HTML documentation. */ 2 | "use strict"; 3 | 4 | const SPHINX_HIGHLIGHT_ENABLED = true 5 | 6 | /** 7 | * highlight a given string on a node by wrapping it in 8 | * span elements with the given class name. 9 | */ 10 | const _highlight = (node, addItems, text, className) => { 11 | if (node.nodeType === Node.TEXT_NODE) { 12 | const val = node.nodeValue; 13 | const parent = node.parentNode; 14 | const pos = val.toLowerCase().indexOf(text); 15 | if ( 16 | pos >= 0 && 17 | !parent.classList.contains(className) && 18 | !parent.classList.contains("nohighlight") 19 | ) { 20 | let span; 21 | 22 | const closestNode = parent.closest("body, svg, foreignObject"); 23 | const isInSVG = closestNode && closestNode.matches("svg"); 24 | if (isInSVG) { 25 | span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); 26 | } else { 27 | span = document.createElement("span"); 28 | span.classList.add(className); 29 | } 30 | 31 | span.appendChild(document.createTextNode(val.substr(pos, text.length))); 32 | parent.insertBefore( 33 | span, 34 | parent.insertBefore( 35 | document.createTextNode(val.substr(pos + text.length)), 36 | node.nextSibling 37 | ) 38 | ); 39 | node.nodeValue = val.substr(0, pos); 40 | 41 | if (isInSVG) { 42 | const rect = document.createElementNS( 43 | "http://www.w3.org/2000/svg", 44 | "rect" 45 | ); 46 | const bbox = parent.getBBox(); 47 | rect.x.baseVal.value = bbox.x; 48 | rect.y.baseVal.value = bbox.y; 49 | rect.width.baseVal.value = bbox.width; 50 | rect.height.baseVal.value = bbox.height; 51 | rect.setAttribute("class", className); 52 | addItems.push({ parent: parent, target: rect }); 53 | } 54 | } 55 | } else if (node.matches && !node.matches("button, select, textarea")) { 56 | node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); 57 | } 58 | }; 59 | const _highlightText = (thisNode, text, className) => { 60 | let addItems = []; 61 | _highlight(thisNode, addItems, text, className); 62 | addItems.forEach((obj) => 63 | obj.parent.insertAdjacentElement("beforebegin", obj.target) 64 | ); 65 | }; 66 | 67 | /** 68 | * Small JavaScript module for the documentation. 69 | */ 70 | const SphinxHighlight = { 71 | 72 | /** 73 | * highlight the search words provided in localstorage in the text 74 | */ 75 | highlightSearchWords: () => { 76 | if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight 77 | 78 | // get and clear terms from localstorage 79 | const url = new URL(window.location); 80 | const highlight = 81 | localStorage.getItem("sphinx_highlight_terms") 82 | || url.searchParams.get("highlight") 83 | || ""; 84 | localStorage.removeItem("sphinx_highlight_terms") 85 | url.searchParams.delete("highlight"); 86 | window.history.replaceState({}, "", url); 87 | 88 | // get individual terms from highlight string 89 | const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); 90 | if (terms.length === 0) return; // nothing to do 91 | 92 | // There should never be more than one element matching "div.body" 93 | const divBody = document.querySelectorAll("div.body"); 94 | const body = divBody.length ? divBody[0] : document.querySelector("body"); 95 | window.setTimeout(() => { 96 | terms.forEach((term) => _highlightText(body, term, "highlighted")); 97 | }, 10); 98 | 99 | const searchBox = document.getElementById("searchbox"); 100 | if (searchBox === null) return; 101 | searchBox.appendChild( 102 | document 103 | .createRange() 104 | .createContextualFragment( 105 | '" 109 | ) 110 | ); 111 | }, 112 | 113 | /** 114 | * helper function to hide the search marks again 115 | */ 116 | hideSearchWords: () => { 117 | document 118 | .querySelectorAll("#searchbox .highlight-link") 119 | .forEach((el) => el.remove()); 120 | document 121 | .querySelectorAll("span.highlighted") 122 | .forEach((el) => el.classList.remove("highlighted")); 123 | localStorage.removeItem("sphinx_highlight_terms") 124 | }, 125 | 126 | initEscapeListener: () => { 127 | // only install a listener if it is really needed 128 | if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; 129 | 130 | document.addEventListener("keydown", (event) => { 131 | // bail for input elements 132 | if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; 133 | // bail with special keys 134 | if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; 135 | if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { 136 | SphinxHighlight.hideSearchWords(); 137 | event.preventDefault(); 138 | } 139 | }); 140 | }, 141 | }; 142 | 143 | _ready(SphinxHighlight.highlightSearchWords); 144 | _ready(SphinxHighlight.initEscapeListener); 145 | -------------------------------------------------------------------------------- /docs/source/_build/autoapi/evoaug/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | evoaug — EvoAug 0.1 documentation 7 | 8 | 9 | 10 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |
27 | 51 | 52 |
56 | 57 |
58 |
59 |
60 | 67 |
68 |
69 |
70 |
71 | 72 |
73 |

evoaug

74 |

EvoAug is a PyTorch package to pretrain sequence-based deep learning models for 75 | regulatory genomics data with evolution-inspired data augmentations followed by a 76 | finetuning on the original, unperturbed sequence data.

77 |
78 |

Submodules

79 |
80 | 84 |
85 |
86 |
87 | 88 | 89 |
90 |
91 |
92 | 93 |
94 | 95 |
96 |

© Copyright 2021, KooLab.

97 |
98 | 99 | Built with Sphinx using a 100 | theme 101 | provided by Read the Docs. 102 | 103 | 104 |
105 |
106 |
107 |
108 |
109 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /docs/source/_build/_static/language_data.js: -------------------------------------------------------------------------------- 1 | /* 2 | * language_data.js 3 | * ~~~~~~~~~~~~~~~~ 4 | * 5 | * This script contains the language-specific data used by searchtools.js, 6 | * namely the list of stopwords, stemmer, scorer and splitter. 7 | * 8 | * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. 9 | * :license: BSD, see LICENSE for details. 10 | * 11 | */ 12 | 13 | var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; 14 | 15 | 16 | /* Non-minified version is copied as a separate JS file, is available */ 17 | 18 | /** 19 | * Porter Stemmer 20 | */ 21 | var Stemmer = function() { 22 | 23 | var step2list = { 24 | ational: 'ate', 25 | tional: 'tion', 26 | enci: 'ence', 27 | anci: 'ance', 28 | izer: 'ize', 29 | bli: 'ble', 30 | alli: 'al', 31 | entli: 'ent', 32 | eli: 'e', 33 | ousli: 'ous', 34 | ization: 'ize', 35 | ation: 'ate', 36 | ator: 'ate', 37 | alism: 'al', 38 | iveness: 'ive', 39 | fulness: 'ful', 40 | ousness: 'ous', 41 | aliti: 'al', 42 | iviti: 'ive', 43 | biliti: 'ble', 44 | logi: 'log' 45 | }; 46 | 47 | var step3list = { 48 | icate: 'ic', 49 | ative: '', 50 | alize: 'al', 51 | iciti: 'ic', 52 | ical: 'ic', 53 | ful: '', 54 | ness: '' 55 | }; 56 | 57 | var c = "[^aeiou]"; // consonant 58 | var v = "[aeiouy]"; // vowel 59 | var C = c + "[^aeiouy]*"; // consonant sequence 60 | var V = v + "[aeiou]*"; // vowel sequence 61 | 62 | var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 63 | var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 64 | var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 65 | var s_v = "^(" + C + ")?" + v; // vowel in stem 66 | 67 | this.stemWord = function (w) { 68 | var stem; 69 | var suffix; 70 | var firstch; 71 | var origword = w; 72 | 73 | if (w.length < 3) 74 | return w; 75 | 76 | var re; 77 | var re2; 78 | var re3; 79 | var re4; 80 | 81 | firstch = w.substr(0,1); 82 | if (firstch == "y") 83 | w = firstch.toUpperCase() + w.substr(1); 84 | 85 | // Step 1a 86 | re = /^(.+?)(ss|i)es$/; 87 | re2 = /^(.+?)([^s])s$/; 88 | 89 | if (re.test(w)) 90 | w = w.replace(re,"$1$2"); 91 | else if (re2.test(w)) 92 | w = w.replace(re2,"$1$2"); 93 | 94 | // Step 1b 95 | re = /^(.+?)eed$/; 96 | re2 = /^(.+?)(ed|ing)$/; 97 | if (re.test(w)) { 98 | var fp = re.exec(w); 99 | re = new RegExp(mgr0); 100 | if (re.test(fp[1])) { 101 | re = /.$/; 102 | w = w.replace(re,""); 103 | } 104 | } 105 | else if (re2.test(w)) { 106 | var fp = re2.exec(w); 107 | stem = fp[1]; 108 | re2 = new RegExp(s_v); 109 | if (re2.test(stem)) { 110 | w = stem; 111 | re2 = /(at|bl|iz)$/; 112 | re3 = new RegExp("([^aeiouylsz])\\1$"); 113 | re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 114 | if (re2.test(w)) 115 | w = w + "e"; 116 | else if (re3.test(w)) { 117 | re = /.$/; 118 | w = w.replace(re,""); 119 | } 120 | else if (re4.test(w)) 121 | w = w + "e"; 122 | } 123 | } 124 | 125 | // Step 1c 126 | re = /^(.+?)y$/; 127 | if (re.test(w)) { 128 | var fp = re.exec(w); 129 | stem = fp[1]; 130 | re = new RegExp(s_v); 131 | if (re.test(stem)) 132 | w = stem + "i"; 133 | } 134 | 135 | // Step 2 136 | re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; 137 | if (re.test(w)) { 138 | var fp = re.exec(w); 139 | stem = fp[1]; 140 | suffix = fp[2]; 141 | re = new RegExp(mgr0); 142 | if (re.test(stem)) 143 | w = stem + step2list[suffix]; 144 | } 145 | 146 | // Step 3 147 | re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; 148 | if (re.test(w)) { 149 | var fp = re.exec(w); 150 | stem = fp[1]; 151 | suffix = fp[2]; 152 | re = new RegExp(mgr0); 153 | if (re.test(stem)) 154 | w = stem + step3list[suffix]; 155 | } 156 | 157 | // Step 4 158 | re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; 159 | re2 = /^(.+?)(s|t)(ion)$/; 160 | if (re.test(w)) { 161 | var fp = re.exec(w); 162 | stem = fp[1]; 163 | re = new RegExp(mgr1); 164 | if (re.test(stem)) 165 | w = stem; 166 | } 167 | else if (re2.test(w)) { 168 | var fp = re2.exec(w); 169 | stem = fp[1] + fp[2]; 170 | re2 = new RegExp(mgr1); 171 | if (re2.test(stem)) 172 | w = stem; 173 | } 174 | 175 | // Step 5 176 | re = /^(.+?)e$/; 177 | if (re.test(w)) { 178 | var fp = re.exec(w); 179 | stem = fp[1]; 180 | re = new RegExp(mgr1); 181 | re2 = new RegExp(meq1); 182 | re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 183 | if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) 184 | w = stem; 185 | } 186 | re = /ll$/; 187 | re2 = new RegExp(mgr1); 188 | if (re.test(w) && re2.test(w)) { 189 | re = /.$/; 190 | w = w.replace(re,""); 191 | } 192 | 193 | // and turn initial Y back to y 194 | if (firstch == "y") 195 | w = firstch.toLowerCase() + w.substr(1); 196 | return w; 197 | } 198 | } 199 | 200 | -------------------------------------------------------------------------------- /docs/source/_build/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Welcome to EvoAug’s documentation! — EvoAug 0.1 documentation 7 | 8 | 9 | 10 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |
28 | 52 | 53 |
57 | 58 |
59 |
60 |
61 |
    62 |
  • »
  • 63 |
  • Welcome to EvoAug’s documentation!
  • 64 |
  • 65 | View page source 66 |
  • 67 |
68 |
69 |
70 |
71 |
72 | 73 |
74 |

Welcome to EvoAug’s documentation!

75 |

EvoAug is a Python library to train PyTorch 76 | models on regulatory genomics data with evolution-inspired 77 | data augmentations.

78 |

Check out the User Guide for further information, including 79 | how to install the project.

80 |
81 |

Note

82 |

This project is under active development. Source code can be found here.

83 |
84 |
85 |

Contents

86 |
87 | 99 |
100 |
101 |
102 | 103 | 104 |
105 |
106 |
109 | 110 |
111 | 112 |
113 |

© Copyright 2021, KooLab.

114 |
115 | 116 | Built with Sphinx using a 117 | theme 118 | provided by Read the Docs. 119 | 120 | 121 |
122 |
123 |
124 |
125 |
126 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /docs/source/_build/_sources/autoapi/evoaug/augment/index.rst.txt: -------------------------------------------------------------------------------- 1 | :py:mod:`evoaug.augment` 2 | ======================== 3 | 4 | .. py:module:: evoaug.augment 5 | 6 | .. autoapi-nested-parse:: 7 | 8 | Library of data augmentations for genomic sequence data. 9 | 10 | To contribute a custom augmentation, use the following syntax: 11 | 12 | .. code-block:: python 13 | 14 | class CustomAugmentation(AugmentBase): 15 | def __init__(self, param1, param2): 16 | self.param1 = param1 17 | self.param2 = param2 18 | 19 | def __call__(self, x: torch.Tensor) -> torch.Tensor: 20 | # Perform augmentation 21 | return x_aug 22 | 23 | 24 | 25 | Module Contents 26 | --------------- 27 | 28 | Classes 29 | ~~~~~~~ 30 | 31 | .. autoapisummary:: 32 | 33 | evoaug.augment.AugmentBase 34 | evoaug.augment.RandomDeletion 35 | evoaug.augment.RandomInsertion 36 | evoaug.augment.RandomTranslocation 37 | evoaug.augment.RandomInversion 38 | evoaug.augment.RandomMutation 39 | evoaug.augment.RandomRC 40 | evoaug.augment.RandomNoise 41 | 42 | 43 | 44 | 45 | .. py:class:: AugmentBase 46 | 47 | Base class for EvoAug augmentations for genomic sequences. 48 | 49 | .. py:method:: __call__(x) 50 | :abstractmethod: 51 | 52 | Return an augmented version of `x`. 53 | 54 | :param x: Batch of one-hot sequences (shape: (N, A, L)). 55 | :type x: torch.Tensor 56 | 57 | :returns: Batch of one-hot sequences with random augmentation applied. 58 | :rtype: torch.Tensor 59 | 60 | 61 | 62 | .. py:class:: RandomDeletion(delete_min=0, delete_max=20) 63 | 64 | Bases: :py:obj:`AugmentBase` 65 | 66 | Randomly deletes a contiguous stretch of nucleotides from sequences in a training 67 | batch according to a random number between a user-defined delete_min and delete_max. 68 | A different deletion is applied to each sequence. 69 | 70 | :param delete_min: Minimum size for random deletion (defaults to 0). 71 | :type delete_min: int, optional 72 | :param delete_max: Maximum size for random deletion (defaults to 20). 73 | :type delete_max: int, optional 74 | 75 | .. py:method:: __call__(x) 76 | 77 | Randomly delete segments in a set of one-hot DNA sequences. 78 | 79 | :param x: Batch of one-hot sequences (shape: (N, A, L)). 80 | :type x: torch.Tensor 81 | 82 | :returns: Sequences with randomly deleted segments (padded to correct shape 83 | with random DNA) 84 | :rtype: torch.Tensor 85 | 86 | 87 | 88 | .. py:class:: RandomInsertion(insert_min=0, insert_max=20) 89 | 90 | Bases: :py:obj:`AugmentBase` 91 | 92 | Randomly inserts a contiguous stretch of nucleotides from sequences in a training 93 | batch according to a random number between a user-defined insert_min and insert_max. 94 | A different insertions is applied to each sequence. Each sequence is padded with random 95 | DNA to ensure same shapes. 96 | 97 | :param insert_min: Minimum size for random insertion, defaults to 0 98 | :type insert_min: int, optional 99 | :param insert_max: Maximum size for random insertion, defaults to 20 100 | :type insert_max: int, optional 101 | 102 | .. py:method:: __call__(x) 103 | 104 | Randomly inserts segments of random DNA to a set of DNA sequences. 105 | 106 | :param x: Batch of one-hot sequences (shape: (N, A, L)). 107 | :type x: torch.Tensor 108 | 109 | :returns: Sequences with randomly inserts segments of random DNA. All sequences 110 | are padded with random DNA to ensure same shape. 111 | :rtype: torch.Tensor 112 | 113 | 114 | 115 | .. py:class:: RandomTranslocation(shift_min=0, shift_max=20) 116 | 117 | Bases: :py:obj:`AugmentBase` 118 | 119 | Randomly cuts sequence in two pieces and shifts the order for each in a training 120 | batch. This is implemented with a roll transformation with a user-defined shift_min 121 | and shift_max. A different roll (positive or negative) is applied to each sequence. 122 | Each sequence is padded with random DNA to ensure same shapes. 123 | 124 | :param shift_min: Minimum size for random shift, defaults to 0. 125 | :type shift_min: int, optional 126 | :param shift_max: Maximum size for random shift, defaults to 20. 127 | :type shift_max: int, optional 128 | 129 | .. py:method:: __call__(x) 130 | 131 | Randomly shifts sequences in a batch, x. 132 | 133 | :param x: Batch of one-hot sequences (shape: (N, A, L)). 134 | :type x: torch.Tensor 135 | 136 | :returns: Sequences with random translocations. 137 | :rtype: torch.Tensor 138 | 139 | 140 | 141 | .. py:class:: RandomInversion(invert_min=0, invert_max=20) 142 | 143 | Bases: :py:obj:`AugmentBase` 144 | 145 | Randomly inverts a contiguous stretch of nucleotides from sequences in a training 146 | batch according to a user-defined invert_min and invert_max. A different insertions 147 | is applied to each sequence. Each sequence is padded with random DNA to ensure same 148 | shapes. 149 | 150 | :param invert_min: Minimum size for random insertion, defaults to 0. 151 | :type invert_min: int, optional 152 | :param invert_max: Maximum size for random insertion, defaults to 20. 153 | :type invert_max: int, optional 154 | 155 | .. py:method:: __call__(x) 156 | 157 | Randomly inverts segments of random DNA to a set of one-hot DNA sequences. 158 | 159 | :param x: Batch of one-hot sequences (shape: (N, A, L)). 160 | :type x: torch.Tensor 161 | 162 | :returns: Sequences with randomly inverted segments of random DNA. 163 | :rtype: torch.Tensor 164 | 165 | 166 | 167 | .. py:class:: RandomMutation(mutate_frac=0.05) 168 | 169 | Bases: :py:obj:`AugmentBase` 170 | 171 | Randomly mutates sequences in a training batch according to a user-defined 172 | mutate_frac. A different set of mutations is applied to each sequence. 173 | 174 | :param mutate_frac: Probability of mutation for each nucleotide, defaults to 0.05. 175 | :type mutate_frac: float, optional 176 | 177 | .. py:method:: __call__(x) 178 | 179 | Randomly introduces mutations to a set of one-hot DNA sequences. 180 | 181 | :param x: Batch of one-hot sequences (shape: (N, A, L)). 182 | :type x: torch.Tensor 183 | 184 | :returns: Sequences with randomly mutated DNA. 185 | :rtype: torch.Tensor 186 | 187 | 188 | 189 | .. py:class:: RandomRC(rc_prob=0.5) 190 | 191 | Bases: :py:obj:`AugmentBase` 192 | 193 | Randomly applies a reverse-complement transformation to each sequence in a training 194 | batch according to a user-defined probability, rc_prob. This is applied to each sequence 195 | independently. 196 | 197 | :param rc_prob: Probability to apply a reverse-complement transformation, defaults to 0.5. 198 | :type rc_prob: float, optional 199 | 200 | .. py:method:: __call__(x) 201 | 202 | Randomly transforms sequences in a batch with a reverse-complement transformation. 203 | 204 | :param x: Batch of one-hot sequences (shape: (N, A, L)). 205 | :type x: torch.Tensor 206 | 207 | :returns: Sequences with random reverse-complements applied. 208 | :rtype: torch.Tensor 209 | 210 | 211 | 212 | .. py:class:: RandomNoise(noise_mean=0.0, noise_std=0.2) 213 | 214 | Bases: :py:obj:`AugmentBase` 215 | 216 | Randomly add Gaussian noise to a batch of sequences with according to a user-defined 217 | noise_mean and noise_std. A different set of noise is applied to each sequence. 218 | 219 | :param noise_mean: Mean of the Gaussian noise, defaults to 0.0. 220 | :type noise_mean: float, optional 221 | :param noise_std: Standard deviation of the Gaussian noise, defaults to 0.2. 222 | :type noise_std: float, optional 223 | 224 | .. py:method:: __call__(x) 225 | 226 | Randomly adds Gaussian noise to a set of one-hot DNA sequences. 227 | 228 | :param x: Batch of one-hot sequences (shape: (N, A, L)). 229 | :type x: torch.Tensor 230 | 231 | :returns: Sequences with random noise. 232 | :rtype: torch.Tensor 233 | 234 | 235 | 236 | -------------------------------------------------------------------------------- /evoaug/evoaug.py: -------------------------------------------------------------------------------- 1 | """ 2 | Model (implemented in Pytorch Lightning) demonstrating how to use augmentations 3 | during training. 4 | """ 5 | 6 | import torch 7 | import lightning.pytorch as pl 8 | import numpy as np 9 | 10 | 11 | class RobustModel(pl.LightningModule): 12 | """PyTorch Lightning module to specify how augmentation should be applied to a model. 13 | 14 | Parameters 15 | ---------- 16 | model : torch.nn.Module 17 | PyTorch model. 18 | criterion : callable 19 | PyTorch loss function 20 | optimizer : torch.optim.Optimizer or dict 21 | PyTorch optimizer as a class or dictionary 22 | augment_list : list 23 | List of data augmentations, each a callable class from augment.py. 24 | Default is empty list -- no augmentations. 25 | max_augs_per_seq : int 26 | Maximum number of augmentations to apply to each sequence. Value is superceded by the number of augmentations in augment_list. 27 | hard_aug : bool 28 | Flag to set a hard number of augmentations, otherwise the number of augmentations is set randomly up to max_augs_per_seq, default is True. 29 | finetune : bool 30 | Flag to turn off augmentations during training, default is False. 31 | inference_aug : bool 32 | Flag to turn on augmentations during inference, default is False. 33 | """ 34 | 35 | def __init__(self, model, criterion, optimizer, augment_list=[], max_augs_per_seq=0, hard_aug=True, finetune=False, inference_aug=False): 36 | super().__init__() 37 | self.model = model 38 | self.criterion = criterion 39 | self.optimizer = optimizer 40 | self.augment_list = augment_list 41 | self.max_augs_per_seq = np.minimum(max_augs_per_seq, len(augment_list)) 42 | self.hard_aug = hard_aug 43 | self.inference_aug = inference_aug 44 | self.optimizer = optimizer 45 | self.max_num_aug = len(augment_list) 46 | self.insert_max = augment_max_len(augment_list) 47 | self.finetune = finetune 48 | 49 | def forward(self, x): 50 | """Standard forward pass.""" 51 | y_hat = self.model(x) 52 | return y_hat 53 | 54 | def configure_optimizers(self): 55 | """Standard optimizer configuration.""" 56 | return self.optimizer 57 | 58 | def training_step(self, batch, batch_idx): 59 | """Training step with augmentations.""" 60 | x, y = batch 61 | if self.finetune: # if finetune, no augmentations 62 | if self.insert_max: # if insert_max is larger than 0, then pad each sequence with random DNA 63 | x = self._pad_end(x) 64 | else: 65 | x = self._apply_augment(x) 66 | y_hat = self(x) 67 | loss = self.criterion(y_hat, y) 68 | self.log('train_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True) 69 | return loss 70 | 71 | 72 | def validation_step(self, batch, batch_idx): 73 | """Validation step without (or with) augmentations.""" 74 | x, y = batch 75 | if self.inference_aug: # if inference_aug, then apply augmentations during inference 76 | x = self._apply_augment(x) 77 | else: 78 | if self.insert_max: # if insert_max is larger than 0, then pad each sequence with random DNA 79 | x = self._pad_end(x) 80 | y_hat = self(x) 81 | loss = self.criterion(y_hat, y) 82 | self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True) 83 | 84 | 85 | def test_step(self, batch, batch_idx): 86 | """Test step without (or with) augmentations.""" 87 | x, y = batch 88 | if self.inference_aug: # if inference_aug, then apply augmentations during inference 89 | x = self._apply_augment(x) 90 | else: 91 | if self.insert_max: # if insert_max is larger than 0, then pad each sequence with random DNA 92 | x = self._pad_end(x) 93 | y_hat = self(x) 94 | loss = self.criterion(y_hat, y) 95 | self.log('test_loss', loss, on_step=False, on_epoch=True, prog_bar=True) 96 | 97 | 98 | def predict_step(self, batch, batch_idx): 99 | """Prediction step without (or with) augmentations.""" 100 | x = batch 101 | if self.inference_aug: # if inference_aug, then apply augmentations during inference 102 | x = self._apply_augment(x) 103 | else: 104 | if self.insert_max: # if insert_max is larger than 0, then pad each sequence with random DNA 105 | x = self._pad_end(x) 106 | return self(x) 107 | 108 | 109 | def _sample_aug_combos(self, batch_size): 110 | """Set the number of augmentations and randomly select augmentations to apply 111 | to each sequence. 112 | """ 113 | # determine the number of augmentations per sequence 114 | if self.hard_aug: 115 | batch_num_aug = self.max_augs_per_seq * np.ones((batch_size,), dtype=int) 116 | else: 117 | batch_num_aug = np.random.randint(1, self.max_augs_per_seq + 1, (batch_size,)) 118 | 119 | # randomly choose which subset of augmentations from augment_list 120 | aug_combos = [ list(sorted(np.random.choice(self.max_num_aug, sample, replace=False))) for sample in batch_num_aug ] 121 | return aug_combos 122 | 123 | 124 | def _apply_augment(self, x): 125 | """Apply augmentations to each sequence in batch, x.""" 126 | # number of augmentations per sequence 127 | aug_combos = self._sample_aug_combos(x.shape[0]) 128 | 129 | # apply augmentation combination to sequences 130 | x_new = [] 131 | for aug_indices, seq in zip(aug_combos, x): 132 | seq = torch.unsqueeze(seq, dim=0) 133 | insert_status = True # status to see if random DNA padding is needed 134 | for aug_index in aug_indices: 135 | seq = self.augment_list[aug_index](seq) 136 | if hasattr(self.augment_list[aug_index], 'insert_max'): 137 | insert_status = False 138 | if insert_status: 139 | if self.insert_max: 140 | seq = self._pad_end(seq) 141 | x_new.append(seq) 142 | return torch.cat(x_new) 143 | 144 | 145 | def _pad_end(self, x): 146 | """Add random DNA padding of length insert_max to the end of each sequence in batch.""" 147 | N, A, L = x.shape 148 | a = torch.eye(A) 149 | p = torch.tensor([1/A for _ in range(A)]) 150 | padding = torch.stack([a[p.multinomial(self.insert_max, replacement=True)].transpose(0,1) for _ in range(N)]).to(x.device) 151 | x_padded = torch.cat( [x, padding.to(x.device)], dim=2 ) 152 | return x_padded 153 | 154 | 155 | def finetune_mode(self, optimizer=None): 156 | """Turn on finetune flag -- no augmentations during training.""" 157 | self.finetune = True 158 | if optimizer != None: 159 | self.optimizer = optimizer 160 | 161 | 162 | 163 | 164 | def load_model_from_checkpoint(model, checkpoint_path): 165 | """Load PyTorch lightning model from checkpoint. 166 | 167 | Parameters 168 | ---------- 169 | model : RobustModel 170 | RobustModel instance. 171 | checkpoint_path : str 172 | path to checkpoint of model weights 173 | 174 | Returns 175 | ------- 176 | RobustModel 177 | Object with weights and config loaded from checkpoint. 178 | """ 179 | return model.load_from_checkpoint(checkpoint_path, 180 | model=model.model, 181 | criterion=model.criterion, 182 | optimizer=model.optimizer, 183 | augment_list=model.augment_list, 184 | max_augs_per_seq=model.max_augs_per_seq, 185 | hard_aug=model.hard_aug, 186 | finetune=model.finetune, 187 | inference_aug=model.inference_aug 188 | ) 189 | 190 | 191 | #------------------------------------------------------------------------ 192 | # Helper function 193 | #------------------------------------------------------------------------ 194 | 195 | 196 | def augment_max_len(augment_list): 197 | """Determine whether insertions are applied to determine the insert_max, 198 | which will be applied to pad other sequences with random DNA. 199 | 200 | Parameters 201 | ---------- 202 | augment_list : list 203 | List of augmentations. 204 | 205 | Returns 206 | ------- 207 | int 208 | Value for insert max. 209 | """ 210 | insert_max = 0 211 | for augment in augment_list: 212 | if hasattr(augment, 'insert_max'): 213 | insert_max = augment.insert_max 214 | return insert_max 215 | -------------------------------------------------------------------------------- /docs/source/_build/searchindex.js: -------------------------------------------------------------------------------- 1 | Search.setIndex({"docnames": ["autoapi/evoaug/augment/index", "autoapi/evoaug/evoaug/index", "autoapi/evoaug/index", "autoapi/index", "index", "usage"], "filenames": ["autoapi/evoaug/augment/index.rst", "autoapi/evoaug/evoaug/index.rst", "autoapi/evoaug/index.rst", "autoapi/index.rst", "index.rst", "usage.rst"], "titles": ["evoaug.augment", "evoaug.evoaug", "evoaug", "API Reference", "Welcome to EvoAug\u2019s documentation!", "User Guide"], "terms": {"librari": [0, 4], "data": [0, 1, 2, 4], "genom": [0, 2, 4], "sequenc": [0, 1, 2, 5], "To": [0, 5], "contribut": 0, "custom": 0, "us": [0, 1, 5], "follow": [0, 2], "syntax": 0, "customaugment": 0, "augmentbas": 0, "def": 0, "__init__": 0, "self": 0, "param1": 0, "param2": 0, "__call__": 0, "x": [0, 1], "torch": [0, 1], "tensor": 0, "perform": 0, "return": [0, 1], "x_aug": 0, "base": [0, 1, 2], "abstract": 0, "an": 0, "version": 0, "paramet": [0, 1], "batch": [0, 1], "one": 0, "hot": 0, "shape": 0, "n": 0, "A": 0, "l": 0, "random": [0, 1], "appli": [0, 1], "type": [0, 1], "randomdelet": [0, 5], "delete_min": [0, 5], "0": [0, 1, 5], "delete_max": [0, 5], "30": [], "randomli": [0, 1, 5], "delet": 0, "contigu": 0, "stretch": 0, "nucleotid": 0, "from": [0, 1, 5], "train": [0, 1, 4, 5], "accord": 0, "number": [0, 1, 5], "between": 0, "user": [0, 4], "defin": [0, 5], "differ": 0, "i": [0, 1, 2, 4], "each": [0, 1], "int": [0, 1], "option": 0, "minimum": 0, "size": 0, "default": [0, 1], "maximum": [0, 1, 5], "segment": 0, "set": [0, 1, 5], "dna": [0, 1], "pad": [0, 1], "correct": 0, "randominsert": [0, 5], "insert_min": [0, 5], "insert_max": [0, 1, 5], "insert": [0, 1], "ensur": 0, "same": 0, "all": 0, "ar": [0, 1], "randomtransloc": [0, 5], "shift_min": [0, 5], "shift_max": [0, 5], "cut": 0, "two": 0, "piec": 0, "shift": 0, "order": 0, "thi": [0, 3, 4], "implement": [0, 1], "roll": 0, "transform": 0, "posit": 0, "neg": 0, "transloc": 0, "randominvers": 0, "invert_min": 0, "invert_max": 0, "invert": 0, "randommut": [0, 5], "mutate_frac": 0, "1": [3, 5], "mutat": 0, "float": 0, "probabl": 0, "introduc": 0, "randomrc": [0, 5], "rc_prob": [0, 5], "5": [0, 5], "revers": 0, "complement": 0, "independ": 0, "randomnois": [0, 5], "noise_mean": [0, 5], "noise_std": [0, 5], "2": [0, 5], "add": [0, 1, 5], "gaussian": 0, "nois": 0, "bia": [], "mean": 0, "standard": [0, 1], "deviat": 0, "model": [1, 2, 4, 5], "pytorch": [1, 2, 4, 5], "lightn": [1, 5], "demonstr": 1, "how": [1, 4], "augment": [1, 2, 3, 4, 5], "dure": [1, 5], "robustmodel": [1, 5], "criterion": [1, 5], "optim": [1, 5], "augment_list": [1, 5], "none": [1, 5], "max_augs_per_seq": [1, 5], "hard_aug": [1, 5], "true": [1, 5], "finetun": [1, 2, 5], "fals": [1, 5], "inference_aug": [1, 5], "pytorch_lightn": [1, 5], "core": 1, "lightningmodul": 1, "specifi": 1, "should": 1, "nn": 1, "callabl": 1, "loss": [1, 5], "dict": [1, 5], "dictionari": 1, "list": 1, "py": 1, "empti": 1, "valu": 1, "superced": 1, "bool": 1, "flag": 1, "hard": 1, "otherwis": [1, 5], "up": [1, 5], "turn": 1, "off": 1, "infer": [1, 5], "forward": 1, "pass": 1, "configure_optim": 1, "configur": 1, "training_step": 1, "batch_idx": 1, "step": 1, "validation_step": 1, "valid": 1, "without": [1, 5], "test_step": 1, "test": 1, "predict_step": 1, "predict": 1, "_sample_aug_combo": 1, "batch_siz": 1, "select": 1, "_apply_aug": 1, "_pad_end": 1, "length": 1, "end": 1, "load_model_from_checkpoint": [1, 5], "checkpoint_path": 1, "load": [1, 5], "checkpoint": [1, 5], "instanc": 1, "str": 1, "path": [1, 5], "weight": 1, "object": 1, "config": 1, "augment_max_len": 1, "determin": 1, "whether": 1, "which": 1, "other": 1, "max": [1, 5], "packag": 2, "pretrain": 2, "deep": 2, "learn": 2, "regulatori": [2, 4], "evolut": [2, 4], "inspir": [2, 4], "origin": 2, "unperturb": 2, "page": 3, "contain": 3, "auto": 3, "gener": 3, "document": 3, "evoaug": [3, 5], "creat": 3, "sphinx": 3, "autoapi": 3, "python": 4, "check": 4, "out": 4, "guid": 4, "further": 4, "inform": 4, "includ": 4, "instal": 4, "project": 4, "under": 4, "activ": 4, "develop": 4, "exampl": 4, "api": 4, "refer": 4, "first": 5, "pip": 5, "import": 5, "pl": 5, "optimizer_dict": 5, "OR": 5, "ckpt_aug_path": 5, "aug": 5, "ckpt": 5, "ckpt_finetune_path": 5, "20": [0, 5], "mut_frac": 5, "05": [0, 5], "robust_model": 5, "per": 5, "sampl": 5, "keep": 5, "time": 5, "callback": 5, "callback_topmodel": 5, "modelcheckpoint": 5, "monitor": 5, "val_loss": 5, "save_top_k": 5, "dirpath": 5, "output_dir": 5, "filenam": 5, "trainer": 5, "gpu": 5, "max_epoch": 5, "100": 5, "auto_select_gpu": 5, "logger": 5, "pre": 5, "fit": 5, "datamodul": 5, "data_modul": 5, "best": 5, "fine": 5, "tune": 5, "sourc": 4, "code": 4, "can": 4, "found": 4, "here": 4, "block": [], "choic": 5, "finetune_mod": 1, "augment_mod": [], "googl": 4, "colab": 4, "deepstarr": 5, "analysi": 5, "http": 5, "research": 5, "com": 5, "drive": 5, "1a2firpbd1xvojf0wnimugtyilts1xetf": 5, "usp": 5, "share": 5, "chip": 5, "seq": 5, "1gz8v4tq3lqmzi30qvdhf7zw6kf5gdykx": 5}, "objects": {"": [[2, 0, 0, "-", "evoaug"]], "evoaug": [[0, 0, 0, "-", "augment"], [1, 0, 0, "-", "evoaug"]], "evoaug.augment": [[0, 1, 1, "", "AugmentBase"], [0, 1, 1, "", "RandomDeletion"], [0, 1, 1, "", "RandomInsertion"], [0, 1, 1, "", "RandomInversion"], [0, 1, 1, "", "RandomMutation"], [0, 1, 1, "", "RandomNoise"], [0, 1, 1, "", "RandomRC"], [0, 1, 1, "", "RandomTranslocation"]], "evoaug.augment.AugmentBase": [[0, 2, 1, "", "__call__"]], "evoaug.augment.RandomDeletion": [[0, 2, 1, "", "__call__"]], "evoaug.augment.RandomInsertion": [[0, 2, 1, "", "__call__"]], "evoaug.augment.RandomInversion": [[0, 2, 1, "", "__call__"]], "evoaug.augment.RandomMutation": [[0, 2, 1, "", "__call__"]], "evoaug.augment.RandomNoise": [[0, 2, 1, "", "__call__"]], "evoaug.augment.RandomRC": [[0, 2, 1, "", "__call__"]], "evoaug.augment.RandomTranslocation": [[0, 2, 1, "", "__call__"]], "evoaug.evoaug": [[1, 1, 1, "", "RobustModel"], [1, 3, 1, "", "augment_max_len"], [1, 3, 1, "", "load_model_from_checkpoint"]], "evoaug.evoaug.RobustModel": [[1, 2, 1, "", "_apply_augment"], [1, 2, 1, "", "_pad_end"], [1, 2, 1, "", "_sample_aug_combos"], [1, 2, 1, "", "configure_optimizers"], [1, 2, 1, "", "finetune_mode"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "predict_step"], [1, 2, 1, "", "test_step"], [1, 2, 1, "", "training_step"], [1, 2, 1, "", "validation_step"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "function", "Python function"]}, "titleterms": {"evoaug": [0, 1, 2, 4], "augment": 0, "modul": [0, 1], "content": [0, 1, 4], "class": [0, 1], "function": 1, "submodul": 2, "api": 3, "refer": 3, "welcom": 4, "": 4, "document": 4, "user": 5, "guid": 5, "instal": 5, "exampl": 5, "googl": 5, "colab": 5}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx": 57}, "alltitles": {"Welcome to EvoAug\u2019s documentation!": [[4, "welcome-to-evoaug-s-documentation"]], "Contents": [[4, "contents"]], "evoaug.augment": [[0, "module-evoaug.augment"]], "Module Contents": [[0, "module-contents"], [1, "module-contents"]], "Classes": [[0, "classes"], [1, "classes"]], "evoaug.evoaug": [[1, "module-evoaug.evoaug"]], "Functions": [[1, "functions"]], "evoaug": [[2, "module-evoaug"]], "Submodules": [[2, "submodules"]], "API Reference": [[3, "api-reference"]], "User Guide": [[5, "user-guide"]], "Installation": [[5, "installation"]], "Example": [[5, "example"]], "Examples on Google Colab": [[5, "examples-on-google-colab"]]}, "indexentries": {"augmentbase (class in evoaug.augment)": [[0, "evoaug.augment.AugmentBase"]], "randomdeletion (class in evoaug.augment)": [[0, "evoaug.augment.RandomDeletion"]], "randominsertion (class in evoaug.augment)": [[0, "evoaug.augment.RandomInsertion"]], "randominversion (class in evoaug.augment)": [[0, "evoaug.augment.RandomInversion"]], "randommutation (class in evoaug.augment)": [[0, "evoaug.augment.RandomMutation"]], "randomnoise (class in evoaug.augment)": [[0, "evoaug.augment.RandomNoise"]], "randomrc (class in evoaug.augment)": [[0, "evoaug.augment.RandomRC"]], "randomtranslocation (class in evoaug.augment)": [[0, "evoaug.augment.RandomTranslocation"]], "__call__() (evoaug.augment.augmentbase method)": [[0, "evoaug.augment.AugmentBase.__call__"]], "__call__() (evoaug.augment.randomdeletion method)": [[0, "evoaug.augment.RandomDeletion.__call__"]], "__call__() (evoaug.augment.randominsertion method)": [[0, "evoaug.augment.RandomInsertion.__call__"]], "__call__() (evoaug.augment.randominversion method)": [[0, "evoaug.augment.RandomInversion.__call__"]], "__call__() (evoaug.augment.randommutation method)": [[0, "evoaug.augment.RandomMutation.__call__"]], "__call__() (evoaug.augment.randomnoise method)": [[0, "evoaug.augment.RandomNoise.__call__"]], "__call__() (evoaug.augment.randomrc method)": [[0, "evoaug.augment.RandomRC.__call__"]], "__call__() (evoaug.augment.randomtranslocation method)": [[0, "evoaug.augment.RandomTranslocation.__call__"]], "evoaug.augment": [[0, "module-evoaug.augment"]], "module": [[0, "module-evoaug.augment"], [1, "module-evoaug.evoaug"], [2, "module-evoaug"]], "robustmodel (class in evoaug.evoaug)": [[1, "evoaug.evoaug.RobustModel"]], "_apply_augment() (evoaug.evoaug.robustmodel method)": [[1, "evoaug.evoaug.RobustModel._apply_augment"]], "_pad_end() (evoaug.evoaug.robustmodel method)": [[1, "evoaug.evoaug.RobustModel._pad_end"]], "_sample_aug_combos() (evoaug.evoaug.robustmodel method)": [[1, "evoaug.evoaug.RobustModel._sample_aug_combos"]], "augment_max_len() (in module evoaug.evoaug)": [[1, "evoaug.evoaug.augment_max_len"]], "configure_optimizers() (evoaug.evoaug.robustmodel method)": [[1, "evoaug.evoaug.RobustModel.configure_optimizers"]], "evoaug.evoaug": [[1, "module-evoaug.evoaug"]], "finetune_mode() (evoaug.evoaug.robustmodel method)": [[1, "evoaug.evoaug.RobustModel.finetune_mode"]], "forward() (evoaug.evoaug.robustmodel method)": [[1, "evoaug.evoaug.RobustModel.forward"]], "load_model_from_checkpoint() (in module evoaug.evoaug)": [[1, "evoaug.evoaug.load_model_from_checkpoint"]], "predict_step() (evoaug.evoaug.robustmodel method)": [[1, "evoaug.evoaug.RobustModel.predict_step"]], "test_step() (evoaug.evoaug.robustmodel method)": [[1, "evoaug.evoaug.RobustModel.test_step"]], "training_step() (evoaug.evoaug.robustmodel method)": [[1, "evoaug.evoaug.RobustModel.training_step"]], "validation_step() (evoaug.evoaug.robustmodel method)": [[1, "evoaug.evoaug.RobustModel.validation_step"]], "evoaug": [[2, "module-evoaug"]]}}) -------------------------------------------------------------------------------- /docs/source/_build/genindex.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Index — EvoAug 0.1 documentation 7 | 8 | 9 | 10 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |
27 | 51 | 52 |
56 | 57 |
58 |
59 |
60 |
    61 |
  • »
  • 62 |
  • Index
  • 63 |
  • 64 |
  • 65 |
66 |
67 |
68 |
69 |
70 | 71 | 72 |

Index

73 | 74 |
75 | _ 76 | | A 77 | | C 78 | | E 79 | | F 80 | | L 81 | | M 82 | | P 83 | | R 84 | | T 85 | | V 86 | 87 |
88 |

_

89 | 90 | 110 | 118 |
119 | 120 |

A

121 | 122 | 126 | 130 |
131 | 132 |

C

133 | 134 | 138 |
139 | 140 |

E

141 | 142 | 158 | 167 |
    143 |
  • 144 | evoaug 145 | 146 |
  • 150 |
  • 151 | evoaug.augment 152 | 153 |
  • 157 |
    159 |
  • 160 | evoaug.evoaug 161 | 162 |
  • 166 |
168 | 169 |

F

170 | 171 | 175 | 179 |
180 | 181 |

L

182 | 183 | 187 |
188 | 189 |

M

190 | 191 | 204 |
205 | 206 |

P

207 | 208 | 212 |
213 | 214 |

R

215 | 216 | 226 | 236 |
237 | 238 |

T

239 | 240 | 244 | 248 |
249 | 250 |

V

251 | 252 | 256 |
257 | 258 | 259 | 260 |
261 |
262 |
263 | 264 |
265 | 266 |
267 |

© Copyright 2021, KooLab.

268 |
269 | 270 | Built with Sphinx using a 271 | theme 272 | provided by Read the Docs. 273 | 274 | 275 |
276 |
277 |
278 |
279 |
280 | 285 | 286 | 287 | -------------------------------------------------------------------------------- /evoaug/augment.py: -------------------------------------------------------------------------------- 1 | """ 2 | Library of data augmentations for genomic sequence data. 3 | 4 | To contribute a custom augmentation, use the following syntax: 5 | 6 | .. code-block:: python 7 | 8 | class CustomAugmentation(AugmentBase): 9 | def __init__(self, param1, param2): 10 | self.param1 = param1 11 | self.param2 = param2 12 | 13 | def __call__(self, x: torch.Tensor) -> torch.Tensor: 14 | # Perform augmentation 15 | return x_aug 16 | 17 | """ 18 | 19 | import torch 20 | 21 | 22 | class AugmentBase: 23 | """ 24 | Base class for EvoAug augmentations for genomic sequences. 25 | """ 26 | def __call__(self, x): 27 | """Return an augmented version of `x`. 28 | 29 | Parameters 30 | ---------- 31 | x : torch.Tensor 32 | Batch of one-hot sequences (shape: (N, A, L)). 33 | 34 | Returns 35 | ------- 36 | torch.Tensor 37 | Batch of one-hot sequences with random augmentation applied. 38 | """ 39 | raise NotImplementedError() 40 | 41 | 42 | class RandomDeletion(AugmentBase): 43 | """Randomly deletes a contiguous stretch of nucleotides from sequences in a training 44 | batch according to a random number between a user-defined delete_min and delete_max. 45 | A different deletion is applied to each sequence. 46 | 47 | Parameters 48 | ---------- 49 | delete_min : int, optional 50 | Minimum size for random deletion (defaults to 0). 51 | delete_max : int, optional 52 | Maximum size for random deletion (defaults to 20). 53 | """ 54 | def __init__(self, delete_min=0, delete_max=20): 55 | self.delete_min = delete_min 56 | self.delete_max = delete_max 57 | 58 | def __call__(self, x): 59 | """Randomly delete segments in a set of one-hot DNA sequences. 60 | 61 | Parameters 62 | ---------- 63 | x : torch.Tensor 64 | Batch of one-hot sequences (shape: (N, A, L)). 65 | 66 | Returns 67 | ------- 68 | torch.Tensor 69 | Sequences with randomly deleted segments (padded to correct shape 70 | with random DNA) 71 | """ 72 | N, A, L = x.shape 73 | 74 | # sample random DNA 75 | a = torch.eye(A) 76 | p = torch.tensor([1/A for _ in range(A)]) 77 | padding = torch.stack([a[p.multinomial(self.delete_max, replacement=True)].transpose(0,1) for _ in range(N)]).to(x.device) 78 | 79 | # sample deletion length for each sequence 80 | delete_lens = torch.randint(self.delete_min, self.delete_max + 1, (N,)) 81 | 82 | # sample locations to delete for each sequence 83 | delete_inds = torch.randint(L - self.delete_max + 1, (N,)) # deletion must be in boundaries of seq. 84 | 85 | # loop over each sequence 86 | x_aug = [] 87 | for seq, pad, delete_len, delete_ind in zip(x, padding, delete_lens, delete_inds): 88 | 89 | # get index of half delete_len (to pad random DNA at beginning of sequence) 90 | pad_begin_index = torch.div(delete_len, 2, rounding_mode='floor').item() 91 | 92 | # index for other half (to pad random DNA at end of sequence) 93 | pad_end_index = delete_len - pad_begin_index 94 | 95 | # removes deletion and pads beginning and end of sequence with random DNA to ensure same length 96 | x_aug.append( torch.cat([pad[:,:pad_begin_index], # random dna padding 97 | seq[:,:delete_ind], # sequence up to deletion start index 98 | seq[:,delete_ind+delete_len:], # sequence after deletion end index 99 | pad[:,self.delete_max-pad_end_index:]], # random dna padding 100 | -1)) # concatenation axis 101 | return torch.stack(x_aug) 102 | 103 | 104 | class RandomInsertion(AugmentBase): 105 | """Randomly inserts a contiguous stretch of nucleotides from sequences in a training 106 | batch according to a random number between a user-defined insert_min and insert_max. 107 | A different insertions is applied to each sequence. Each sequence is padded with random 108 | DNA to ensure same shapes. 109 | 110 | Parameters 111 | ---------- 112 | insert_min : int, optional 113 | Minimum size for random insertion, defaults to 0 114 | insert_max : int, optional 115 | Maximum size for random insertion, defaults to 20 116 | """ 117 | def __init__(self, insert_min=0, insert_max=20): 118 | self.insert_min = insert_min 119 | self.insert_max = insert_max 120 | 121 | def __call__(self, x): 122 | """Randomly inserts segments of random DNA to a set of DNA sequences. 123 | 124 | Parameters 125 | ---------- 126 | x : torch.Tensor 127 | Batch of one-hot sequences (shape: (N, A, L)). 128 | 129 | Returns 130 | ------- 131 | torch.Tensor 132 | Sequences with randomly inserts segments of random DNA. All sequences 133 | are padded with random DNA to ensure same shape. 134 | """ 135 | N, A, L = x.shape 136 | 137 | # sample random DNA 138 | a = torch.eye(A) 139 | p = torch.tensor([1/A for _ in range(A)]) 140 | insertions = torch.stack([a[p.multinomial(self.insert_max, replacement=True)].transpose(0,1) for _ in range(N)]).to(x.device) 141 | 142 | # sample insertion length for each sequence 143 | insert_lens = torch.randint(self.insert_min, self.insert_max + 1, (N,)) 144 | 145 | # sample locations to insertion for each sequence 146 | insert_inds = torch.randint(L, (N,)) 147 | 148 | # loop over each sequence 149 | x_aug = [] 150 | for seq, insertion, insert_len, insert_ind in zip(x, insertions, insert_lens, insert_inds): 151 | 152 | # get index of half insert_len (to pad random DNA at beginning of sequence) 153 | insert_beginning_len = torch.div((self.insert_max - insert_len), 2, rounding_mode='floor').item() 154 | 155 | # index for other half (to pad random DNA at end of sequence) 156 | insert_end_len = self.insert_max - insert_len - insert_beginning_len 157 | 158 | # removes deletion and pads beginning and end of sequence with random DNA to ensure same length 159 | x_aug.append( torch.cat([insertion[:,:insert_beginning_len], # random dna padding 160 | seq[:,:insert_ind], # sequence up to insertion start index 161 | insertion[:,insert_beginning_len:insert_beginning_len+insert_len], # random insertion 162 | seq[:,insert_ind:], # sequence after insertion end index 163 | insertion[:,insert_beginning_len+insert_len:self.insert_max]], # random dna padding 164 | -1)) # concatenation axis 165 | return torch.stack(x_aug) 166 | 167 | 168 | class RandomTranslocation(AugmentBase): 169 | """Randomly cuts sequence in two pieces and shifts the order for each in a training 170 | batch. This is implemented with a roll transformation with a user-defined shift_min 171 | and shift_max. A different roll (positive or negative) is applied to each sequence. 172 | Each sequence is padded with random DNA to ensure same shapes. 173 | 174 | Parameters 175 | ---------- 176 | shift_min : int, optional 177 | Minimum size for random shift, defaults to 0. 178 | shift_max : int, optional 179 | Maximum size for random shift, defaults to 20. 180 | """ 181 | def __init__(self, shift_min=0, shift_max=20): 182 | self.shift_min = shift_min 183 | self.shift_max = shift_max 184 | 185 | def __call__(self, x): 186 | """Randomly shifts sequences in a batch, x. 187 | 188 | Parameters 189 | ---------- 190 | x : torch.Tensor 191 | Batch of one-hot sequences (shape: (N, A, L)). 192 | 193 | Returns 194 | ------- 195 | torch.Tensor 196 | Sequences with random translocations. 197 | """ 198 | N = x.shape[0] 199 | 200 | # determine size of shifts for each sequence 201 | shifts = torch.randint(self.shift_min, self.shift_max + 1, (N,)) 202 | 203 | # make some of the shifts negative 204 | ind_neg = torch.rand(N) < 0.5 205 | shifts[ind_neg] = -1 * shifts[ind_neg] 206 | 207 | # apply random shift to each sequence 208 | x_rolled = [] 209 | for i, shift in enumerate(shifts): 210 | x_rolled.append( torch.roll(x[i], shift.item(), -1) ) 211 | x_rolled = torch.stack(x_rolled).to(x.device) 212 | return x_rolled 213 | 214 | 215 | 216 | class RandomInversion(AugmentBase): 217 | """Randomly inverts a contiguous stretch of nucleotides from sequences in a training 218 | batch according to a user-defined invert_min and invert_max. A different insertions 219 | is applied to each sequence. Each sequence is padded with random DNA to ensure same 220 | shapes. 221 | 222 | Parameters 223 | ---------- 224 | invert_min : int, optional 225 | Minimum size for random insertion, defaults to 0. 226 | invert_max : int, optional 227 | Maximum size for random insertion, defaults to 20. 228 | """ 229 | def __init__(self, invert_min=0, invert_max=20): 230 | self.invert_min = invert_min 231 | self.invert_max = invert_max 232 | 233 | def __call__(self, x): 234 | """Randomly inverts segments of random DNA to a set of one-hot DNA sequences. 235 | 236 | Parameters 237 | ---------- 238 | x : torch.Tensor 239 | Batch of one-hot sequences (shape: (N, A, L)). 240 | 241 | Returns 242 | ------- 243 | torch.Tensor 244 | Sequences with randomly inverted segments of random DNA. 245 | """ 246 | N, A, L = x.shape 247 | 248 | # set random inversion size for each seequence 249 | inversion_lens = torch.randint(self.invert_min, self.invert_max + 1, (N,)) 250 | 251 | # randomly select start location for each inversion 252 | inversion_inds = torch.randint(L - self.invert_max + 1, (N,)) # inversion must be in boundaries of seq. 253 | 254 | # apply random inversion to each sequence 255 | x_aug = [] 256 | for seq, inversion_len, inversion_ind in zip(x, inversion_lens, inversion_inds): 257 | x_aug.append( torch.cat([seq[:,:inversion_ind], # sequence up to inversion start index 258 | torch.flip(seq[:,inversion_ind:inversion_ind+inversion_len], dims=[0,1]), # reverse-complement transformation 259 | seq[:,inversion_ind+inversion_len:]], # sequence after inversion 260 | -1)) # concatenation axis 261 | return torch.stack(x_aug) 262 | 263 | 264 | 265 | class RandomMutation(AugmentBase): 266 | """Randomly mutates sequences in a training batch according to a user-defined 267 | mutate_frac. A different set of mutations is applied to each sequence. 268 | 269 | Parameters 270 | ---------- 271 | mutate_frac : float, optional 272 | Probability of mutation for each nucleotide, defaults to 0.05. 273 | """ 274 | def __init__(self, mut_frac=0.05): 275 | self.mutate_frac = mut_frac 276 | 277 | def __call__(self, x): 278 | """Randomly introduces mutations to a set of one-hot DNA sequences. 279 | 280 | Parameters 281 | ---------- 282 | x : torch.Tensor 283 | Batch of one-hot sequences (shape: (N, A, L)). 284 | 285 | Returns 286 | ------- 287 | torch.Tensor 288 | Sequences with randomly mutated DNA. 289 | """ 290 | N, A, L = x.shape 291 | 292 | # determine the number of mutations per sequence 293 | num_mutations = round(self.mutate_frac / 0.75 * L) # num. mutations per sequence (accounting for silent mutations) 294 | 295 | # randomly determine the indices to apply mutations 296 | mutation_inds = torch.argsort(torch.rand(N,L))[:, :num_mutations] # see 0 297 | 298 | # create random DNA (to serve as random mutations) 299 | a = torch.eye(A) 300 | p = torch.tensor([1/A for _ in range(A)]) 301 | mutations = torch.stack([a[p.multinomial(num_mutations, replacement=True)].transpose(0,1) for _ in range(N)]).to(x.device) 302 | 303 | # make a copy of the batch of sequences 304 | x_aug = torch.clone(x) 305 | 306 | # loop over sequences and apply mutations 307 | for i in range(N): 308 | x_aug[i,:,mutation_inds[i]] = mutations[i] 309 | return x_aug 310 | 311 | 312 | 313 | class RandomRC(AugmentBase): 314 | """Randomly applies a reverse-complement transformation to each sequence in a training 315 | batch according to a user-defined probability, rc_prob. This is applied to each sequence 316 | independently. 317 | 318 | Parameters 319 | ---------- 320 | rc_prob : float, optional 321 | Probability to apply a reverse-complement transformation, defaults to 0.5. 322 | """ 323 | def __init__(self, rc_prob=0.5): 324 | """Creates random reverse-complement object usable by EvoAug. 325 | """ 326 | self.rc_prob = rc_prob 327 | 328 | def __call__(self, x): 329 | """Randomly transforms sequences in a batch with a reverse-complement transformation. 330 | 331 | Parameters 332 | ---------- 333 | x : torch.Tensor 334 | Batch of one-hot sequences (shape: (N, A, L)). 335 | 336 | Returns 337 | ------- 338 | torch.Tensor 339 | Sequences with random reverse-complements applied. 340 | """ 341 | # make a copy of the sequence 342 | x_aug = torch.clone(x) 343 | 344 | # randomly select sequences to apply rc transformation 345 | ind_rc = torch.rand(x_aug.shape[0]) < self.rc_prob 346 | 347 | # apply reverse-complement transformation 348 | x_aug[ind_rc] = torch.flip(x_aug[ind_rc], dims=[1,2]) 349 | return x_aug 350 | 351 | 352 | class RandomNoise(AugmentBase): 353 | """Randomly add Gaussian noise to a batch of sequences with according to a user-defined 354 | noise_mean and noise_std. A different set of noise is applied to each sequence. 355 | 356 | Parameters 357 | ---------- 358 | noise_mean : float, optional 359 | Mean of the Gaussian noise, defaults to 0.0. 360 | noise_std : float, optional 361 | Standard deviation of the Gaussian noise, defaults to 0.2. 362 | """ 363 | def __init__(self, noise_mean=0.0, noise_std=0.2): 364 | self.noise_mean = noise_mean 365 | self.noise_std = noise_std 366 | 367 | def __call__(self, x): 368 | """Randomly adds Gaussian noise to a set of one-hot DNA sequences. 369 | 370 | Parameters 371 | ---------- 372 | x : torch.Tensor 373 | Batch of one-hot sequences (shape: (N, A, L)). 374 | 375 | Returns 376 | ------- 377 | torch.Tensor 378 | Sequences with random noise. 379 | """ 380 | return x + torch.normal(self.noise_mean, self.noise_std, x.shape).to(x.device) 381 | -------------------------------------------------------------------------------- /docs/source/_build/usage.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | User Guide — EvoAug 0.1 documentation 7 | 8 | 9 | 10 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |
28 | 57 | 58 |
62 | 63 |
64 |
65 |
66 | 73 |
74 |
75 |
76 |
77 | 78 |
79 |

User Guide

80 |
81 |

Installation

82 |

To use EvoAug, first install it using pip:

83 |
pip install evoaug
 84 | 
85 |
86 |
87 |
88 |

Example

89 |

Import evoaug:

90 |
from evoaug import evoaug, augment
 91 | import pytorch_lightning as pl
 92 | 
93 |
94 |

Define PyTorch model and modeling choices:

95 |
model = "DEFINE PYTORCH MODEL"
 96 | loss = "DEFINE PYTORCH LOSS"
 97 | optimizer_dict = "DEFINE OPTIMIZER OR OPTIMIZER DICT"
 98 | ckpt_aug_path = "path-to-aug-checkpoint.ckpt"
 99 | ckpt_finetune_path = "path-to-finetune-checkpoint.ckpt"
100 | 
101 |
102 |

Train model with augmentations:

103 |
augment_list = [
104 |    augment.RandomDeletion(delete_min=0, delete_max=20),
105 |    augment.RandomRC(rc_prob=0.5),
106 |    augment.RandomInsertion(insert_min=0, insert_max=20),
107 |    augment.RandomTranslocation(shift_min=0, shift_max=20),
108 |    augment.RandomMutation(mut_frac=0.05),
109 |    augment.RandomNoise(noise_mean=0, noise_std=0.2),
110 | ]
111 | 
112 | robust_model = evoaug.RobustModel(
113 |    model,
114 |    criterion=loss,
115 |    optimizer=optimizer_dict,
116 |    augment_list=augment_list,
117 |    max_augs_per_seq=2,  # maximum number of augmentations per sequence
118 |    hard_aug=True,  # use max_augs_per_seq, otherwise sample randomly up to max
119 |    inference_aug=False,  # if true, keep augmentations on during inference time
120 | )
121 | 
122 | # set up callback
123 | callback_topmodel = pl.callbacks.ModelCheckpoint(
124 |    monitor="val_loss", save_top_k=1, dirpath=output_dir, filename=ckpt_aug_path
125 | )
126 | 
127 | # train model
128 | trainer = pl.Trainer(
129 |    gpus=1,
130 |    max_epochs=100,
131 |    auto_select_gpus=True,
132 |    logger=None,
133 |    callbacks=["ADD CALLBACKS", "callback_topmodel"],
134 | )
135 | 
136 | # pre-train model with augmentations
137 | trainer.fit(robust_model, datamodule=data_module)
138 | 
139 | # load best model
140 | robust_model = evoaug.load_model_from_checkpoint(robust_model, ckpt_aug_path)
141 | 
142 |
143 |

Fine-tune model without augmentations:

144 |
# set up fine-tuning
145 | robust_model.finetune = True
146 | robust_model.optimizer = "set up optimizer for fine-tuning"
147 | 
148 | # set up callback
149 | callback_topmodel = pl.callbacks.ModelCheckpoint(
150 |    monitor="val_loss",
151 |    save_top_k=1,
152 |    dirpath=output_dir,
153 |    filename=ckpt_finetune_path,
154 | )
155 | 
156 | # set up pytorch lightning trainer
157 | trainer = pl.Trainer(
158 |    gpus=1,
159 |    max_epochs=100,
160 |    auto_select_gpus=True,
161 |    logger=None,
162 |    callbacks=["ADD CALLBACKS", "callback_topmodel"],
163 | )
164 | 
165 | # fine-tune model
166 | trainer.fit(robust_model, datamodule=data_module)
167 | 
168 | # load best fine-tuned model
169 | robust_model = evoaug.load_model_from_checkpoint(robust_model, ckpt_finetune_path)
170 | 
171 |
172 |
173 |
174 |

Examples on Google Colab

175 |

DeepSTARR analysis:

176 |
https://colab.research.google.com/drive/1a2fiRPBd1xvoJf0WNiMUgTYiLTs1XETf?usp=sharing
177 | 
178 |
179 |

ChIP-seq analysis:

180 |
https://colab.research.google.com/drive/1GZ8v4Tq3LQMZI30qvdhF7ZW6Kf5GDyKX?usp=sharing
181 | 
182 |
183 |
184 |
185 | 186 | 187 |
188 |
189 |
192 | 193 |
194 | 195 |
196 |

© Copyright 2021, KooLab.

197 |
198 | 199 | Built with Sphinx using a 200 | theme 201 | provided by Read the Docs. 202 | 203 | 204 |
205 |
206 |
207 |
208 |
209 | 214 | 215 | 216 | -------------------------------------------------------------------------------- /docs/source/_build/_static/underscore.js: -------------------------------------------------------------------------------- 1 | !function(n,r){"object"==typeof exports&&"undefined"!=typeof module?module.exports=r():"function"==typeof define&&define.amd?define("underscore",r):(n="undefined"!=typeof globalThis?globalThis:n||self,function(){var t=n._,e=n._=r();e.noConflict=function(){return n._=t,e}}())}(this,(function(){ 2 | // Underscore.js 1.13.1 3 | // https://underscorejs.org 4 | // (c) 2009-2021 Jeremy Ashkenas, Julian Gonggrijp, and DocumentCloud and Investigative Reporters & Editors 5 | // Underscore may be freely distributed under the MIT license. 6 | var n="1.13.1",r="object"==typeof self&&self.self===self&&self||"object"==typeof global&&global.global===global&&global||Function("return this")()||{},t=Array.prototype,e=Object.prototype,u="undefined"!=typeof Symbol?Symbol.prototype:null,o=t.push,i=t.slice,a=e.toString,f=e.hasOwnProperty,c="undefined"!=typeof ArrayBuffer,l="undefined"!=typeof DataView,s=Array.isArray,p=Object.keys,v=Object.create,h=c&&ArrayBuffer.isView,y=isNaN,d=isFinite,g=!{toString:null}.propertyIsEnumerable("toString"),b=["valueOf","isPrototypeOf","toString","propertyIsEnumerable","hasOwnProperty","toLocaleString"],m=Math.pow(2,53)-1;function j(n,r){return r=null==r?n.length-1:+r,function(){for(var t=Math.max(arguments.length-r,0),e=Array(t),u=0;u=0&&t<=m}}function J(n){return function(r){return null==r?void 0:r[n]}}var G=J("byteLength"),H=K(G),Q=/\[object ((I|Ui)nt(8|16|32)|Float(32|64)|Uint8Clamped|Big(I|Ui)nt64)Array\]/;var X=c?function(n){return h?h(n)&&!q(n):H(n)&&Q.test(a.call(n))}:C(!1),Y=J("length");function Z(n,r){r=function(n){for(var r={},t=n.length,e=0;e":">",'"':""","'":"'","`":"`"},Cn=Ln($n),Kn=Ln(_n($n)),Jn=tn.templateSettings={evaluate:/<%([\s\S]+?)%>/g,interpolate:/<%=([\s\S]+?)%>/g,escape:/<%-([\s\S]+?)%>/g},Gn=/(.)^/,Hn={"'":"'","\\":"\\","\r":"r","\n":"n","\u2028":"u2028","\u2029":"u2029"},Qn=/\\|'|\r|\n|\u2028|\u2029/g;function Xn(n){return"\\"+Hn[n]}var Yn=/^\s*(\w|\$)+\s*$/;var Zn=0;function nr(n,r,t,e,u){if(!(e instanceof r))return n.apply(t,u);var o=Mn(n.prototype),i=n.apply(o,u);return _(i)?i:o}var rr=j((function(n,r){var t=rr.placeholder,e=function(){for(var u=0,o=r.length,i=Array(o),a=0;a1)ur(a,r-1,t,e),u=e.length;else for(var f=0,c=a.length;f0&&(t=r.apply(this,arguments)),n<=1&&(r=null),t}}var lr=rr(cr,2);function sr(n,r,t){r=qn(r,t);for(var e,u=nn(n),o=0,i=u.length;o0?0:u-1;o>=0&&o0?a=o>=0?o:Math.max(o+f,a):f=o>=0?Math.min(o+1,f):o+f+1;else if(t&&o&&f)return e[o=t(e,u)]===u?o:-1;if(u!=u)return(o=r(i.call(e,a,f),$))>=0?o+a:-1;for(o=n>0?a:f-1;o>=0&&o0?0:i-1;for(u||(e=r[o?o[a]:a],a+=n);a>=0&&a=3;return r(n,Fn(t,u,4),e,o)}}var Ar=wr(1),xr=wr(-1);function Sr(n,r,t){var e=[];return r=qn(r,t),jr(n,(function(n,t,u){r(n,t,u)&&e.push(n)})),e}function Or(n,r,t){r=qn(r,t);for(var e=!er(n)&&nn(n),u=(e||n).length,o=0;o=0}var Br=j((function(n,r,t){var e,u;return D(r)?u=r:(r=Nn(r),e=r.slice(0,-1),r=r[r.length-1]),_r(n,(function(n){var o=u;if(!o){if(e&&e.length&&(n=In(n,e)),null==n)return;o=n[r]}return null==o?o:o.apply(n,t)}))}));function Nr(n,r){return _r(n,Rn(r))}function Ir(n,r,t){var e,u,o=-1/0,i=-1/0;if(null==r||"number"==typeof r&&"object"!=typeof n[0]&&null!=n)for(var a=0,f=(n=er(n)?n:jn(n)).length;ao&&(o=e);else r=qn(r,t),jr(n,(function(n,t,e){((u=r(n,t,e))>i||u===-1/0&&o===-1/0)&&(o=n,i=u)}));return o}function Tr(n,r,t){if(null==r||t)return er(n)||(n=jn(n)),n[Wn(n.length-1)];var e=er(n)?En(n):jn(n),u=Y(e);r=Math.max(Math.min(r,u),0);for(var o=u-1,i=0;i1&&(e=Fn(e,r[1])),r=an(n)):(e=qr,r=ur(r,!1,!1),n=Object(n));for(var u=0,o=r.length;u1&&(t=r[1])):(r=_r(ur(r,!1,!1),String),e=function(n,t){return!Er(r,t)}),Ur(n,e,t)}));function zr(n,r,t){return i.call(n,0,Math.max(0,n.length-(null==r||t?1:r)))}function Lr(n,r,t){return null==n||n.length<1?null==r||t?void 0:[]:null==r||t?n[0]:zr(n,n.length-r)}function $r(n,r,t){return i.call(n,null==r||t?1:r)}var Cr=j((function(n,r){return r=ur(r,!0,!0),Sr(n,(function(n){return!Er(r,n)}))})),Kr=j((function(n,r){return Cr(n,r)}));function Jr(n,r,t,e){A(r)||(e=t,t=r,r=!1),null!=t&&(t=qn(t,e));for(var u=[],o=[],i=0,a=Y(n);ir?(e&&(clearTimeout(e),e=null),a=c,i=n.apply(u,o),e||(u=o=null)):e||!1===t.trailing||(e=setTimeout(f,l)),i};return c.cancel=function(){clearTimeout(e),a=0,e=u=o=null},c},debounce:function(n,r,t){var e,u,o,i,a,f=function(){var c=zn()-u;r>c?e=setTimeout(f,r-c):(e=null,t||(i=n.apply(a,o)),e||(o=a=null))},c=j((function(c){return a=this,o=c,u=zn(),e||(e=setTimeout(f,r),t&&(i=n.apply(a,o))),i}));return c.cancel=function(){clearTimeout(e),e=o=a=null},c},wrap:function(n,r){return rr(r,n)},negate:fr,compose:function(){var n=arguments,r=n.length-1;return function(){for(var t=r,e=n[r].apply(this,arguments);t--;)e=n[t].call(this,e);return e}},after:function(n,r){return function(){if(--n<1)return r.apply(this,arguments)}},before:cr,once:lr,findKey:sr,findIndex:vr,findLastIndex:hr,sortedIndex:yr,indexOf:gr,lastIndexOf:br,find:mr,detect:mr,findWhere:function(n,r){return mr(n,Dn(r))},each:jr,forEach:jr,map:_r,collect:_r,reduce:Ar,foldl:Ar,inject:Ar,reduceRight:xr,foldr:xr,filter:Sr,select:Sr,reject:function(n,r,t){return Sr(n,fr(qn(r)),t)},every:Or,all:Or,some:Mr,any:Mr,contains:Er,includes:Er,include:Er,invoke:Br,pluck:Nr,where:function(n,r){return Sr(n,Dn(r))},max:Ir,min:function(n,r,t){var e,u,o=1/0,i=1/0;if(null==r||"number"==typeof r&&"object"!=typeof n[0]&&null!=n)for(var a=0,f=(n=er(n)?n:jn(n)).length;ae||void 0===t)return 1;if(t tbody > tr > td > ul { 183 | padding-left: 0em; 184 | } 185 | 186 | table.indextable tr.pcap { 187 | height: 10px; 188 | } 189 | 190 | table.indextable tr.cap { 191 | margin-top: 10px; 192 | background-color: #f2f2f2; 193 | } 194 | 195 | img.toggler { 196 | margin-right: 3px; 197 | margin-top: 3px; 198 | cursor: pointer; 199 | } 200 | 201 | div.modindex-jumpbox { 202 | border-top: 1px solid #ddd; 203 | border-bottom: 1px solid #ddd; 204 | margin: 1em 0 1em 0; 205 | padding: 0.4em; 206 | } 207 | 208 | div.genindex-jumpbox { 209 | border-top: 1px solid #ddd; 210 | border-bottom: 1px solid #ddd; 211 | margin: 1em 0 1em 0; 212 | padding: 0.4em; 213 | } 214 | 215 | /* -- domain module index --------------------------------------------------- */ 216 | 217 | table.modindextable td { 218 | padding: 2px; 219 | border-collapse: collapse; 220 | } 221 | 222 | /* -- general body styles --------------------------------------------------- */ 223 | 224 | div.body { 225 | min-width: 360px; 226 | max-width: 800px; 227 | } 228 | 229 | div.body p, div.body dd, div.body li, div.body blockquote { 230 | -moz-hyphens: auto; 231 | -ms-hyphens: auto; 232 | -webkit-hyphens: auto; 233 | hyphens: auto; 234 | } 235 | 236 | a.headerlink { 237 | visibility: hidden; 238 | } 239 | a.brackets:before, 240 | span.brackets > a:before{ 241 | content: "["; 242 | } 243 | 244 | a.brackets:after, 245 | span.brackets > a:after { 246 | content: "]"; 247 | } 248 | 249 | 250 | h1:hover > a.headerlink, 251 | h2:hover > a.headerlink, 252 | h3:hover > a.headerlink, 253 | h4:hover > a.headerlink, 254 | h5:hover > a.headerlink, 255 | h6:hover > a.headerlink, 256 | dt:hover > a.headerlink, 257 | caption:hover > a.headerlink, 258 | p.caption:hover > a.headerlink, 259 | div.code-block-caption:hover > a.headerlink { 260 | visibility: visible; 261 | } 262 | 263 | div.body p.caption { 264 | text-align: inherit; 265 | } 266 | 267 | div.body td { 268 | text-align: left; 269 | } 270 | 271 | .first { 272 | margin-top: 0 !important; 273 | } 274 | 275 | p.rubric { 276 | margin-top: 30px; 277 | font-weight: bold; 278 | } 279 | 280 | img.align-left, figure.align-left, .figure.align-left, object.align-left { 281 | clear: left; 282 | float: left; 283 | margin-right: 1em; 284 | } 285 | 286 | img.align-right, figure.align-right, .figure.align-right, object.align-right { 287 | clear: right; 288 | float: right; 289 | margin-left: 1em; 290 | } 291 | 292 | img.align-center, figure.align-center, .figure.align-center, object.align-center { 293 | display: block; 294 | margin-left: auto; 295 | margin-right: auto; 296 | } 297 | 298 | img.align-default, figure.align-default, .figure.align-default { 299 | display: block; 300 | margin-left: auto; 301 | margin-right: auto; 302 | } 303 | 304 | .align-left { 305 | text-align: left; 306 | } 307 | 308 | .align-center { 309 | text-align: center; 310 | } 311 | 312 | .align-default { 313 | text-align: center; 314 | } 315 | 316 | .align-right { 317 | text-align: right; 318 | } 319 | 320 | /* -- sidebars -------------------------------------------------------------- */ 321 | 322 | div.sidebar, 323 | aside.sidebar { 324 | margin: 0 0 0.5em 1em; 325 | border: 1px solid #ddb; 326 | padding: 7px; 327 | background-color: #ffe; 328 | width: 40%; 329 | float: right; 330 | clear: right; 331 | overflow-x: auto; 332 | } 333 | 334 | p.sidebar-title { 335 | font-weight: bold; 336 | } 337 | div.admonition, div.topic, blockquote { 338 | clear: left; 339 | } 340 | 341 | /* -- topics ---------------------------------------------------------------- */ 342 | div.topic { 343 | border: 1px solid #ccc; 344 | padding: 7px; 345 | margin: 10px 0 10px 0; 346 | } 347 | 348 | p.topic-title { 349 | font-size: 1.1em; 350 | font-weight: bold; 351 | margin-top: 10px; 352 | } 353 | 354 | /* -- admonitions ----------------------------------------------------------- */ 355 | 356 | div.admonition { 357 | margin-top: 10px; 358 | margin-bottom: 10px; 359 | padding: 7px; 360 | } 361 | 362 | div.admonition dt { 363 | font-weight: bold; 364 | } 365 | 366 | p.admonition-title { 367 | margin: 0px 10px 5px 0px; 368 | font-weight: bold; 369 | } 370 | 371 | div.body p.centered { 372 | text-align: center; 373 | margin-top: 25px; 374 | } 375 | 376 | /* -- content of sidebars/topics/admonitions -------------------------------- */ 377 | 378 | div.sidebar > :last-child, 379 | aside.sidebar > :last-child, 380 | div.topic > :last-child, 381 | div.admonition > :last-child { 382 | margin-bottom: 0; 383 | } 384 | 385 | div.sidebar::after, 386 | aside.sidebar::after, 387 | div.topic::after, 388 | div.admonition::after, 389 | blockquote::after { 390 | display: block; 391 | content: ''; 392 | clear: both; 393 | } 394 | 395 | /* -- tables ---------------------------------------------------------------- */ 396 | 397 | table.docutils { 398 | margin-top: 10px; 399 | margin-bottom: 10px; 400 | border: 0; 401 | border-collapse: collapse; 402 | } 403 | 404 | table.align-center { 405 | margin-left: auto; 406 | margin-right: auto; 407 | } 408 | 409 | table.align-default { 410 | margin-left: auto; 411 | margin-right: auto; 412 | } 413 | 414 | table caption span.caption-number { 415 | font-style: italic; 416 | } 417 | 418 | table caption span.caption-text { 419 | } 420 | 421 | table.docutils td, table.docutils th { 422 | padding: 1px 8px 1px 5px; 423 | border-top: 0; 424 | border-left: 0; 425 | border-right: 0; 426 | border-bottom: 1px solid #aaa; 427 | } 428 | 429 | th { 430 | text-align: left; 431 | padding-right: 5px; 432 | } 433 | 434 | table.citation { 435 | border-left: solid 1px gray; 436 | margin-left: 1px; 437 | } 438 | 439 | table.citation td { 440 | border-bottom: none; 441 | } 442 | 443 | th > :first-child, 444 | td > :first-child { 445 | margin-top: 0px; 446 | } 447 | 448 | th > :last-child, 449 | td > :last-child { 450 | margin-bottom: 0px; 451 | } 452 | 453 | /* -- figures --------------------------------------------------------------- */ 454 | 455 | div.figure, figure { 456 | margin: 0.5em; 457 | padding: 0.5em; 458 | } 459 | 460 | div.figure p.caption, figcaption { 461 | padding: 0.3em; 462 | } 463 | 464 | div.figure p.caption span.caption-number, 465 | figcaption span.caption-number { 466 | font-style: italic; 467 | } 468 | 469 | div.figure p.caption span.caption-text, 470 | figcaption span.caption-text { 471 | } 472 | 473 | /* -- field list styles ----------------------------------------------------- */ 474 | 475 | table.field-list td, table.field-list th { 476 | border: 0 !important; 477 | } 478 | 479 | .field-list ul { 480 | margin: 0; 481 | padding-left: 1em; 482 | } 483 | 484 | .field-list p { 485 | margin: 0; 486 | } 487 | 488 | .field-name { 489 | -moz-hyphens: manual; 490 | -ms-hyphens: manual; 491 | -webkit-hyphens: manual; 492 | hyphens: manual; 493 | } 494 | 495 | /* -- hlist styles ---------------------------------------------------------- */ 496 | 497 | table.hlist { 498 | margin: 1em 0; 499 | } 500 | 501 | table.hlist td { 502 | vertical-align: top; 503 | } 504 | 505 | /* -- object description styles --------------------------------------------- */ 506 | 507 | .sig { 508 | font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; 509 | } 510 | 511 | .sig-name, code.descname { 512 | background-color: transparent; 513 | font-weight: bold; 514 | } 515 | 516 | .sig-name { 517 | font-size: 1.1em; 518 | } 519 | 520 | code.descname { 521 | font-size: 1.2em; 522 | } 523 | 524 | .sig-prename, code.descclassname { 525 | background-color: transparent; 526 | } 527 | 528 | .optional { 529 | font-size: 1.3em; 530 | } 531 | 532 | .sig-paren { 533 | font-size: larger; 534 | } 535 | 536 | .sig-param.n { 537 | font-style: italic; 538 | } 539 | 540 | /* C++ specific styling */ 541 | 542 | .sig-inline.c-texpr, 543 | .sig-inline.cpp-texpr { 544 | font-family: unset; 545 | } 546 | 547 | .sig.c .k, .sig.c .kt, 548 | .sig.cpp .k, .sig.cpp .kt { 549 | color: #0033B3; 550 | } 551 | 552 | .sig.c .m, 553 | .sig.cpp .m { 554 | color: #1750EB; 555 | } 556 | 557 | .sig.c .s, .sig.c .sc, 558 | .sig.cpp .s, .sig.cpp .sc { 559 | color: #067D17; 560 | } 561 | 562 | 563 | /* -- other body styles ----------------------------------------------------- */ 564 | 565 | ol.arabic { 566 | list-style: decimal; 567 | } 568 | 569 | ol.loweralpha { 570 | list-style: lower-alpha; 571 | } 572 | 573 | ol.upperalpha { 574 | list-style: upper-alpha; 575 | } 576 | 577 | ol.lowerroman { 578 | list-style: lower-roman; 579 | } 580 | 581 | ol.upperroman { 582 | list-style: upper-roman; 583 | } 584 | 585 | :not(li) > ol > li:first-child > :first-child, 586 | :not(li) > ul > li:first-child > :first-child { 587 | margin-top: 0px; 588 | } 589 | 590 | :not(li) > ol > li:last-child > :last-child, 591 | :not(li) > ul > li:last-child > :last-child { 592 | margin-bottom: 0px; 593 | } 594 | 595 | ol.simple ol p, 596 | ol.simple ul p, 597 | ul.simple ol p, 598 | ul.simple ul p { 599 | margin-top: 0; 600 | } 601 | 602 | ol.simple > li:not(:first-child) > p, 603 | ul.simple > li:not(:first-child) > p { 604 | margin-top: 0; 605 | } 606 | 607 | ol.simple p, 608 | ul.simple p { 609 | margin-bottom: 0; 610 | } 611 | dl.footnote > dt, 612 | dl.citation > dt { 613 | float: left; 614 | margin-right: 0.5em; 615 | } 616 | 617 | dl.footnote > dd, 618 | dl.citation > dd { 619 | margin-bottom: 0em; 620 | } 621 | 622 | dl.footnote > dd:after, 623 | dl.citation > dd:after { 624 | content: ""; 625 | clear: both; 626 | } 627 | 628 | dl.field-list { 629 | display: grid; 630 | grid-template-columns: fit-content(30%) auto; 631 | } 632 | 633 | dl.field-list > dt { 634 | font-weight: bold; 635 | word-break: break-word; 636 | padding-left: 0.5em; 637 | padding-right: 5px; 638 | } 639 | dl.field-list > dt:after { 640 | content: ":"; 641 | } 642 | 643 | 644 | dl.field-list > dd { 645 | padding-left: 0.5em; 646 | margin-top: 0em; 647 | margin-left: 0em; 648 | margin-bottom: 0em; 649 | } 650 | 651 | dl { 652 | margin-bottom: 15px; 653 | } 654 | 655 | dd > :first-child { 656 | margin-top: 0px; 657 | } 658 | 659 | dd ul, dd table { 660 | margin-bottom: 10px; 661 | } 662 | 663 | dd { 664 | margin-top: 3px; 665 | margin-bottom: 10px; 666 | margin-left: 30px; 667 | } 668 | 669 | dl > dd:last-child, 670 | dl > dd:last-child > :last-child { 671 | margin-bottom: 0; 672 | } 673 | 674 | dt:target, span.highlighted { 675 | background-color: #fbe54e; 676 | } 677 | 678 | rect.highlighted { 679 | fill: #fbe54e; 680 | } 681 | 682 | dl.glossary dt { 683 | font-weight: bold; 684 | font-size: 1.1em; 685 | } 686 | 687 | .versionmodified { 688 | font-style: italic; 689 | } 690 | 691 | .system-message { 692 | background-color: #fda; 693 | padding: 5px; 694 | border: 3px solid red; 695 | } 696 | 697 | .footnote:target { 698 | background-color: #ffa; 699 | } 700 | 701 | .line-block { 702 | display: block; 703 | margin-top: 1em; 704 | margin-bottom: 1em; 705 | } 706 | 707 | .line-block .line-block { 708 | margin-top: 0; 709 | margin-bottom: 0; 710 | margin-left: 1.5em; 711 | } 712 | 713 | .guilabel, .menuselection { 714 | font-family: sans-serif; 715 | } 716 | 717 | .accelerator { 718 | text-decoration: underline; 719 | } 720 | 721 | .classifier { 722 | font-style: oblique; 723 | } 724 | 725 | .classifier:before { 726 | font-style: normal; 727 | margin: 0 0.5em; 728 | content: ":"; 729 | display: inline-block; 730 | } 731 | 732 | abbr, acronym { 733 | border-bottom: dotted 1px; 734 | cursor: help; 735 | } 736 | 737 | /* -- code displays --------------------------------------------------------- */ 738 | 739 | pre { 740 | overflow: auto; 741 | overflow-y: hidden; /* fixes display issues on Chrome browsers */ 742 | } 743 | 744 | pre, div[class*="highlight-"] { 745 | clear: both; 746 | } 747 | 748 | span.pre { 749 | -moz-hyphens: none; 750 | -ms-hyphens: none; 751 | -webkit-hyphens: none; 752 | hyphens: none; 753 | white-space: nowrap; 754 | } 755 | 756 | div[class*="highlight-"] { 757 | margin: 1em 0; 758 | } 759 | 760 | td.linenos pre { 761 | border: 0; 762 | background-color: transparent; 763 | color: #aaa; 764 | } 765 | 766 | table.highlighttable { 767 | display: block; 768 | } 769 | 770 | table.highlighttable tbody { 771 | display: block; 772 | } 773 | 774 | table.highlighttable tr { 775 | display: flex; 776 | } 777 | 778 | table.highlighttable td { 779 | margin: 0; 780 | padding: 0; 781 | } 782 | 783 | table.highlighttable td.linenos { 784 | padding-right: 0.5em; 785 | } 786 | 787 | table.highlighttable td.code { 788 | flex: 1; 789 | overflow: hidden; 790 | } 791 | 792 | .highlight .hll { 793 | display: block; 794 | } 795 | 796 | div.highlight pre, 797 | table.highlighttable pre { 798 | margin: 0; 799 | } 800 | 801 | div.code-block-caption + div { 802 | margin-top: 0; 803 | } 804 | 805 | div.code-block-caption { 806 | margin-top: 1em; 807 | padding: 2px 5px; 808 | font-size: small; 809 | } 810 | 811 | div.code-block-caption code { 812 | background-color: transparent; 813 | } 814 | 815 | table.highlighttable td.linenos, 816 | span.linenos, 817 | div.highlight span.gp { /* gp: Generic.Prompt */ 818 | user-select: none; 819 | -webkit-user-select: text; /* Safari fallback only */ 820 | -webkit-user-select: none; /* Chrome/Safari */ 821 | -moz-user-select: none; /* Firefox */ 822 | -ms-user-select: none; /* IE10+ */ 823 | } 824 | 825 | div.code-block-caption span.caption-number { 826 | padding: 0.1em 0.3em; 827 | font-style: italic; 828 | } 829 | 830 | div.code-block-caption span.caption-text { 831 | } 832 | 833 | div.literal-block-wrapper { 834 | margin: 1em 0; 835 | } 836 | 837 | code.xref, a code { 838 | background-color: transparent; 839 | font-weight: bold; 840 | } 841 | 842 | h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { 843 | background-color: transparent; 844 | } 845 | 846 | .viewcode-link { 847 | float: right; 848 | } 849 | 850 | .viewcode-back { 851 | float: right; 852 | font-family: sans-serif; 853 | } 854 | 855 | div.viewcode-block:target { 856 | margin: -1px -10px; 857 | padding: 0 10px; 858 | } 859 | 860 | /* -- math display ---------------------------------------------------------- */ 861 | 862 | img.math { 863 | vertical-align: middle; 864 | } 865 | 866 | div.body div.math p { 867 | text-align: center; 868 | } 869 | 870 | span.eqno { 871 | float: right; 872 | } 873 | 874 | span.eqno a.headerlink { 875 | position: absolute; 876 | z-index: 1; 877 | } 878 | 879 | div.math:hover a.headerlink { 880 | visibility: visible; 881 | } 882 | 883 | /* -- printout stylesheet --------------------------------------------------- */ 884 | 885 | @media print { 886 | div.document, 887 | div.documentwrapper, 888 | div.bodywrapper { 889 | margin: 0 !important; 890 | width: 100%; 891 | } 892 | 893 | div.sphinxsidebar, 894 | div.related, 895 | div.footer, 896 | #top-link { 897 | display: none; 898 | } 899 | } -------------------------------------------------------------------------------- /docs/source/_build/autoapi/evoaug/evoaug/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | evoaug.evoaug — EvoAug 0.1 documentation 7 | 8 | 9 | 10 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |
27 | 51 | 52 |
56 | 57 |
58 |
59 |
60 | 67 |
68 |
69 |
70 |
71 | 72 |
73 |

evoaug.evoaug

74 |

Model (implemented in Pytorch Lightning) demonstrating how to use augmentations 75 | during training.

76 |
77 |

Module Contents

78 |
79 |

Classes

80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 |

RobustModel

PyTorch Lightning module to specify how augmentation should be applied to a model.

91 |
92 |
93 |

Functions

94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 |

load_model_from_checkpoint(model, checkpoint_path)

Load PyTorch lightning model from checkpoint.

augment_max_len(augment_list)

Determine whether insertions are applied to determine the insert_max,

108 |
109 |
110 | class evoaug.evoaug.RobustModel(model, criterion, optimizer, augment_list=[], max_augs_per_seq=0, hard_aug=True, finetune=False, inference_aug=False)
111 |

Bases: pytorch_lightning.core.lightning.LightningModule

112 |

PyTorch Lightning module to specify how augmentation should be applied to a model.

113 |
114 |
Parameters
115 |
    116 |
  • model (torch.nn.Module) – PyTorch model.

  • 117 |
  • criterion (callable) – PyTorch loss function

  • 118 |
  • optimizer (torch.optim.Optimizer or dict) – PyTorch optimizer as a class or dictionary

  • 119 |
  • augment_list (list) – List of data augmentations, each a callable class from augment.py. 120 | Default is empty list – no augmentations.

  • 121 |
  • max_augs_per_seq (int) – Maximum number of augmentations to apply to each sequence. Value is superceded by the number of augmentations in augment_list.

  • 122 |
  • hard_aug (bool) – Flag to set a hard number of augmentations, otherwise the number of augmentations is set randomly up to max_augs_per_seq, default is True.

  • 123 |
  • finetune (bool) – Flag to turn off augmentations during training, default is False.

  • 124 |
  • inference_aug (bool) – Flag to turn on augmentations during inference, default is False.

  • 125 |
126 |
127 |
128 |
129 |
130 | forward(x)
131 |

Standard forward pass.

132 |
133 | 134 |
135 |
136 | configure_optimizers()
137 |

Standard optimizer configuration.

138 |
139 | 140 |
141 |
142 | training_step(batch, batch_idx)
143 |

Training step with augmentations.

144 |
145 | 146 |
147 |
148 | validation_step(batch, batch_idx)
149 |

Validation step without (or with) augmentations.

150 |
151 | 152 |
153 |
154 | test_step(batch, batch_idx)
155 |

Test step without (or with) augmentations.

156 |
157 | 158 |
159 |
160 | predict_step(batch, batch_idx)
161 |

Prediction step without (or with) augmentations.

162 |
163 | 164 |
165 |
166 | _sample_aug_combos(batch_size)
167 |

Set the number of augmentations and randomly select augmentations to apply 168 | to each sequence.

169 |
170 | 171 |
172 |
173 | _apply_augment(x)
174 |

Apply augmentations to each sequence in batch, x.

175 |
176 | 177 |
178 |
179 | _pad_end(x)
180 |

Add random DNA padding of length insert_max to the end of each sequence in batch.

181 |
182 | 183 |
184 |
185 | finetune_mode(optimizer=None)
186 |

Turn on finetune flag – no augmentations during training.

187 |
188 | 189 |
190 | 191 |
192 |
193 | evoaug.evoaug.load_model_from_checkpoint(model, checkpoint_path)
194 |

Load PyTorch lightning model from checkpoint.

195 |
196 |
Parameters
197 |
    198 |
  • model (RobustModel) – RobustModel instance.

  • 199 |
  • checkpoint_path (str) – path to checkpoint of model weights

  • 200 |
201 |
202 |
Returns
203 |

Object with weights and config loaded from checkpoint.

204 |
205 |
Return type
206 |

RobustModel

207 |
208 |
209 |
210 | 211 |
212 |
213 | evoaug.evoaug.augment_max_len(augment_list)
214 |

Determine whether insertions are applied to determine the insert_max, 215 | which will be applied to pad other sequences with random DNA.

216 |
217 |
Parameters
218 |

augment_list (list) – List of augmentations.

219 |
220 |
Returns
221 |

Value for insert max.

222 |
223 |
Return type
224 |

int

225 |
226 |
227 |
228 | 229 |
230 |
231 |
232 | 233 | 234 |
235 |
236 |
237 | 238 |
239 | 240 |
241 |

© Copyright 2021, KooLab.

242 |
243 | 244 | Built with Sphinx using a 245 | theme 246 | provided by Read the Docs. 247 | 248 | 249 |
250 |
251 |
252 |
253 |
254 | 259 | 260 | 261 | --------------------------------------------------------------------------------