├── .github └── workflows │ ├── python-app.yml │ └── python-publish.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── .source └── _static │ ├── bio.png │ ├── deepchain.png │ ├── protein.png │ ├── score_mutation.png │ ├── sequence.png │ └── transformers.png ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── biotransformers ├── __init__.py ├── bio_transformers.py ├── lightning_utils │ ├── __init__.py │ ├── data.py │ ├── models.py │ └── optimizer.py ├── tests │ ├── __init__.py │ ├── conftest.py │ ├── test_accuracy.py │ ├── test_embeddings.py │ ├── test_logits.py │ ├── test_loglikelihoods.py │ └── test_mutation_score.py ├── utils │ ├── __init__.py │ ├── compute_utils.py │ ├── constant.py │ ├── deprecated.py │ ├── logger.py │ ├── msa_utils.py │ ├── tqdm_utils.py │ └── utils.py ├── version.py └── wrappers │ ├── __init__.py │ ├── esm_wrappers.py │ ├── language_model.py │ ├── rostlab_wrapper.py │ └── transformers_wrappers.py ├── data ├── fasta │ └── example_fasta.fasta └── msa │ ├── seq0_swissprot.a3m │ ├── seq10_swissprot.a3m │ ├── seq11_swissprot.a3m │ ├── seq12_swissprot.a3m │ └── seq1_swissprot.a3m ├── docs ├── Makefile ├── environment_docs.yaml ├── make.bat └── source │ ├── _build │ ├── .buildinfo │ ├── .doctrees │ │ ├── api │ │ │ └── biotransformers.doctree │ │ ├── autoapi │ │ │ ├── biotransformers │ │ │ │ ├── bio_transformers │ │ │ │ │ └── index.doctree │ │ │ │ ├── index.doctree │ │ │ │ ├── lightning_utils │ │ │ │ │ ├── data │ │ │ │ │ │ └── index.doctree │ │ │ │ │ ├── index.doctree │ │ │ │ │ ├── models │ │ │ │ │ │ └── index.doctree │ │ │ │ │ └── optimizer │ │ │ │ │ │ └── index.doctree │ │ │ │ ├── tests │ │ │ │ │ ├── conftest │ │ │ │ │ │ └── index.doctree │ │ │ │ │ ├── index.doctree │ │ │ │ │ ├── test_accuracy │ │ │ │ │ │ └── index.doctree │ │ │ │ │ ├── test_embeddings │ │ │ │ │ │ └── index.doctree │ │ │ │ │ ├── test_logits │ │ │ │ │ │ └── index.doctree │ │ │ │ │ ├── test_loglikelihoods │ │ │ │ │ │ └── index.doctree │ │ │ │ │ └── test_msa │ │ │ │ │ │ └── index.doctree │ │ │ │ ├── utils │ │ │ │ │ ├── constant │ │ │ │ │ │ └── index.doctree │ │ │ │ │ ├── deprecated │ │ │ │ │ │ └── index.doctree │ │ │ │ │ ├── index.doctree │ │ │ │ │ ├── logger │ │ │ │ │ │ └── index.doctree │ │ │ │ │ ├── msa_utils │ │ │ │ │ │ └── index.doctree │ │ │ │ │ ├── tqdm_utils │ │ │ │ │ │ └── index.doctree │ │ │ │ │ └── utils │ │ │ │ │ │ └── index.doctree │ │ │ │ ├── version │ │ │ │ │ └── index.doctree │ │ │ │ └── wrappers │ │ │ │ │ ├── esm_wrappers │ │ │ │ │ └── index.doctree │ │ │ │ │ ├── index.doctree │ │ │ │ │ ├── language_model │ │ │ │ │ └── index.doctree │ │ │ │ │ ├── rostlab_wrapper │ │ │ │ │ └── index.doctree │ │ │ │ │ └── transformers_wrappers │ │ │ │ │ └── index.doctree │ │ │ └── index.doctree │ │ ├── contributing │ │ │ ├── CHANGELOG.doctree │ │ │ └── CONTRIBUTING.doctree │ │ ├── documentation │ │ │ ├── course.doctree │ │ │ ├── logging.doctree │ │ │ ├── msa.doctree │ │ │ └── multi_gpus.doctree │ │ ├── environment.pickle │ │ ├── getting_started │ │ │ ├── install.doctree │ │ │ └── quick_start.doctree │ │ ├── index.doctree │ │ └── tutorial │ │ │ ├── embeddings.doctree │ │ │ ├── finetuning.doctree │ │ │ └── loglikelihood.doctree │ ├── 404.html │ ├── _sources │ │ ├── api │ │ │ └── biotransformers.rst.txt │ │ ├── autoapi │ │ │ ├── biotransformers │ │ │ │ ├── bio_transformers │ │ │ │ │ └── index.rst.txt │ │ │ │ ├── index.rst.txt │ │ │ │ ├── lightning_utils │ │ │ │ │ ├── data │ │ │ │ │ │ └── index.rst.txt │ │ │ │ │ ├── index.rst.txt │ │ │ │ │ ├── models │ │ │ │ │ │ └── index.rst.txt │ │ │ │ │ └── optimizer │ │ │ │ │ │ └── index.rst.txt │ │ │ │ ├── tests │ │ │ │ │ ├── conftest │ │ │ │ │ │ └── index.rst.txt │ │ │ │ │ ├── index.rst.txt │ │ │ │ │ ├── test_accuracy │ │ │ │ │ │ └── index.rst.txt │ │ │ │ │ ├── test_embeddings │ │ │ │ │ │ └── index.rst.txt │ │ │ │ │ ├── test_logits │ │ │ │ │ │ └── index.rst.txt │ │ │ │ │ ├── test_loglikelihoods │ │ │ │ │ │ └── index.rst.txt │ │ │ │ │ └── test_msa │ │ │ │ │ │ └── index.rst.txt │ │ │ │ ├── utils │ │ │ │ │ ├── constant │ │ │ │ │ │ └── index.rst.txt │ │ │ │ │ ├── deprecated │ │ │ │ │ │ └── index.rst.txt │ │ │ │ │ ├── index.rst.txt │ │ │ │ │ ├── logger │ │ │ │ │ │ └── index.rst.txt │ │ │ │ │ ├── msa_utils │ │ │ │ │ │ └── index.rst.txt │ │ │ │ │ ├── tqdm_utils │ │ │ │ │ │ └── index.rst.txt │ │ │ │ │ └── utils │ │ │ │ │ │ └── index.rst.txt │ │ │ │ ├── version │ │ │ │ │ └── index.rst.txt │ │ │ │ └── wrappers │ │ │ │ │ ├── esm_wrappers │ │ │ │ │ └── index.rst.txt │ │ │ │ │ ├── index.rst.txt │ │ │ │ │ ├── language_model │ │ │ │ │ └── index.rst.txt │ │ │ │ │ ├── rostlab_wrapper │ │ │ │ │ └── index.rst.txt │ │ │ │ │ └── transformers_wrappers │ │ │ │ │ └── index.rst.txt │ │ │ └── index.rst.txt │ │ ├── contributing │ │ │ ├── CHANGELOG.md.txt │ │ │ └── CONTRIBUTING.md.txt │ │ ├── documentation │ │ │ ├── course.md.txt │ │ │ ├── logging.md.txt │ │ │ ├── msa.md.txt │ │ │ └── multi_gpus.md.txt │ │ ├── getting_started │ │ │ ├── install.rst.txt │ │ │ └── quick_start.md.txt │ │ ├── index.rst.txt │ │ └── tutorial │ │ │ ├── embeddings.md.txt │ │ │ ├── finetuning.md.txt │ │ │ └── loglikelihood.md.txt │ ├── _static │ │ ├── __init__.py │ │ ├── basic.css │ │ ├── css │ │ │ ├── index.c5995385ac14fb8791e8eb36b4908be2.css │ │ │ └── theme.css │ │ ├── deepchain-small.png │ │ ├── doctools.js │ │ ├── documentation_options.js │ │ ├── file.png │ │ ├── graphviz.css │ │ ├── images │ │ │ ├── logo_binder.svg │ │ │ ├── logo_colab.png │ │ │ └── logo_jupyterhub.svg │ │ ├── jquery-3.5.1.js │ │ ├── jquery.js │ │ ├── js │ │ │ └── index.1c5a1a01449ed65a7b51.js │ │ ├── language_data.js │ │ ├── minus.png │ │ ├── plus.png │ │ ├── pygments.css │ │ ├── searchtools.js │ │ ├── sphinx-book-theme.12a9622fbb08dcb3a2a40b2c02b83a57.js │ │ ├── sphinx-book-theme.acff12b8f9c144ce68a297486a2fa670.css │ │ ├── sphinx-book-theme.css │ │ ├── tabs.css │ │ ├── tabs.js │ │ ├── underscore-1.12.0.js │ │ ├── underscore.js │ │ ├── vendor │ │ │ └── fontawesome │ │ │ │ └── 5.13.0 │ │ │ │ ├── LICENSE.txt │ │ │ │ ├── css │ │ │ │ └── all.min.css │ │ │ │ └── webfonts │ │ │ │ ├── fa-brands-400.eot │ │ │ │ ├── fa-brands-400.svg │ │ │ │ ├── fa-brands-400.ttf │ │ │ │ ├── fa-brands-400.woff │ │ │ │ ├── fa-brands-400.woff2 │ │ │ │ ├── fa-regular-400.eot │ │ │ │ ├── fa-regular-400.svg │ │ │ │ ├── fa-regular-400.ttf │ │ │ │ ├── fa-regular-400.woff │ │ │ │ ├── fa-regular-400.woff2 │ │ │ │ ├── fa-solid-900.eot │ │ │ │ ├── fa-solid-900.svg │ │ │ │ ├── fa-solid-900.ttf │ │ │ │ ├── fa-solid-900.woff │ │ │ │ └── fa-solid-900.woff2 │ │ └── webpack-macros.html │ ├── api │ │ └── biotransformers.html │ ├── autoapi │ │ ├── biotransformers │ │ │ ├── bio_transformers │ │ │ │ └── index.html │ │ │ ├── index.html │ │ │ ├── lightning_utils │ │ │ │ ├── data │ │ │ │ │ └── index.html │ │ │ │ ├── index.html │ │ │ │ ├── models │ │ │ │ │ └── index.html │ │ │ │ └── optimizer │ │ │ │ │ └── index.html │ │ │ ├── tests │ │ │ │ ├── conftest │ │ │ │ │ └── index.html │ │ │ │ ├── index.html │ │ │ │ ├── test_accuracy │ │ │ │ │ └── index.html │ │ │ │ ├── test_embeddings │ │ │ │ │ └── index.html │ │ │ │ ├── test_logits │ │ │ │ │ └── index.html │ │ │ │ ├── test_loglikelihoods │ │ │ │ │ └── index.html │ │ │ │ └── test_msa │ │ │ │ │ └── index.html │ │ │ ├── utils │ │ │ │ ├── constant │ │ │ │ │ └── index.html │ │ │ │ ├── deprecated │ │ │ │ │ └── index.html │ │ │ │ ├── index.html │ │ │ │ ├── logger │ │ │ │ │ └── index.html │ │ │ │ ├── msa_utils │ │ │ │ │ └── index.html │ │ │ │ ├── tqdm_utils │ │ │ │ │ └── index.html │ │ │ │ └── utils │ │ │ │ │ └── index.html │ │ │ ├── version │ │ │ │ └── index.html │ │ │ └── wrappers │ │ │ │ ├── esm_wrappers │ │ │ │ └── index.html │ │ │ │ ├── index.html │ │ │ │ ├── language_model │ │ │ │ └── index.html │ │ │ │ ├── rostlab_wrapper │ │ │ │ └── index.html │ │ │ │ └── transformers_wrappers │ │ │ │ └── index.html │ │ └── index.html │ ├── contributing │ │ ├── CHANGELOG.html │ │ └── CONTRIBUTING.html │ ├── documentation │ │ ├── course.html │ │ ├── logging.html │ │ ├── msa.html │ │ └── multi_gpus.html │ ├── genindex.html │ ├── getting_started │ │ ├── install.html │ │ └── quick_start.html │ ├── index.html │ ├── objects.inv │ ├── py-modindex.html │ ├── search.html │ ├── searchindex.js │ └── tutorial │ │ ├── embeddings.html │ │ ├── finetuning.html │ │ └── loglikelihood.html │ ├── _static │ └── deepchain-small.png │ ├── api │ └── biotransformers.rst │ ├── conf.py │ ├── contributing │ ├── CHANGELOG.md │ └── CONTRIBUTING.md │ ├── documentation │ ├── course.md │ ├── logging.md │ ├── msa.md │ └── multi_gpus.md │ ├── getting_started │ ├── install.rst │ └── quick_start.md │ ├── images │ ├── bio.png │ └── score_mutation.jpeg │ ├── index.rst │ └── tutorial │ ├── embeddings.md │ ├── finetuning.md │ ├── loglikelihood.md │ └── mutations_score.rst ├── environment_dev.yaml ├── requirements.txt └── setup.py /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python application 5 | 6 | on: 7 | push: 8 | branches: [ develop ] 9 | pull_request: 10 | branches: [ develop ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python 3.7 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: 3.7 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install flake8 pytest pytest-cov 27 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 28 | - name: Lint with flake8 29 | run: | 30 | # stop the build if there are Python syntax errors or undefined names 31 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 32 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 33 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=100 --statistics 34 | - name: Test with pytest 35 | run: | 36 | pip install . 37 | pytest --cov=./ --cov-report=xml 38 | - name: "Upload coverage to Codecov" 39 | uses: codecov/codecov-action@v1 40 | with: 41 | token: ${{ secrets.CODECOV_TOKEN }} 42 | fail_ci_if_error: true 43 | files: ./coverage.xml 44 | path_to_write_report: ./coverage/codecov_report.txt 45 | verbose: true 46 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created,edited] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: '3.7' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools wheel twine 25 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 26 | - name: Build and publish 27 | env: 28 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 29 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 30 | run: | 31 | python setup.py sdist bdist_wheel 32 | twine upload dist/* 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # OS generated files 2 | .DS_Store 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | deploy.md 33 | notebooks/ 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | db.sqlite3-journal 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | docs/source/_build/html 79 | 80 | # PyBuilder 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # IPython 87 | profile_default/ 88 | ipython_config.py 89 | 90 | # pyenv 91 | .python-version 92 | .vscode/ 93 | logs/ 94 | 95 | # pipenv 96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 99 | # install all needed dependencies. 100 | #Pipfile.lock 101 | 102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 103 | __pypackages__/ 104 | 105 | # Celery stuff 106 | celerybeat-schedule 107 | celerybeat.pid 108 | 109 | # SageMath parsed files 110 | *.sage.py 111 | 112 | # Environments 113 | .env 114 | .venv 115 | env/ 116 | venv/ 117 | ENV/ 118 | env.bak/ 119 | venv.bak/ 120 | 121 | # Spyder project settings 122 | .spyderproject 123 | .spyproject 124 | 125 | # Rope project settings 126 | .ropeproject 127 | 128 | # mkdocs documentation 129 | /site 130 | 131 | # mypy 132 | .mypy_cache/ 133 | .dmypy.json 134 | dmypy.json 135 | 136 | # Pyre type checker 137 | .pyre/ 138 | 139 | #model checkpoint 140 | *.pt 141 | 142 | #personal test script 143 | multigpus_embeddings.py 144 | multigpus_training.py 145 | multigpus_accuracy.py 146 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3.7 3 | 4 | repos: 5 | - repo: https://github.com/ambv/black 6 | rev: 20.8b1 7 | hooks: 8 | - id: black 9 | args: 10 | - --line-length=88 11 | 12 | - repo: https://github.com/pre-commit/pre-commit-hooks 13 | rev: v3.4.0 14 | hooks: 15 | - id: debug-statements 16 | - id: requirements-txt-fixer 17 | - id: check-ast # Simply check whether the files parse as valid python 18 | - id: check-case-conflict # Check for files that would conflict in case-insensitive filesystems 19 | - id: check-builtin-literals # Require literal syntax when initializing empty or zero Python builtin types 20 | - id: check-docstring-first # Check a common error of defining a docstring after code 21 | - id: check-merge-conflict # Check for files that contain merge conflict strings 22 | - id: check-yaml # Check yaml files 23 | - id: end-of-file-fixer # Ensure that a file is either empty, or ends with one newline 24 | - id: mixed-line-ending # Replace or checks mixed line ending 25 | - id: trailing-whitespace # This hook trims trailing whitespace 26 | 27 | - repo: https://github.com/pre-commit/mirrors-mypy 28 | rev: v0.800 29 | hooks: 30 | - id: mypy 31 | args: 32 | - --no-strict-optional 33 | - --ignore-missing-imports 34 | 35 | - repo: https://gitlab.com/pycqa/flake8 36 | rev: 3.8.4 37 | hooks: 38 | - id: flake8 39 | args: 40 | - --max-line-length=88 41 | - --max-cognitive-complexity=15 42 | - --ignore=E203,E266,E501,W503 43 | additional_dependencies: 44 | - pep8-naming 45 | - flake8-builtins 46 | - flake8-comprehensions 47 | - flake8-bugbear 48 | - flake8-pytest-style 49 | - flake8-cognitive-complexity 50 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Build documentation in the docs/ directory with Sphinx 9 | sphinx: 10 | configuration: docs/source/conf.py 11 | 12 | # Optionally build your docs in additional formats such as PDF 13 | formats: 14 | - pdf 15 | 16 | # Optionally set the version of Python and requirements required to build your docs 17 | python: 18 | version: 3.7 19 | 20 | conda: 21 | environment: docs/environment_docs.yaml 22 | -------------------------------------------------------------------------------- /.source/_static/bio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepChainBio/bio-transformers/a4bf57164464f5d763129e6008dbf06263287972/.source/_static/bio.png -------------------------------------------------------------------------------- /.source/_static/deepchain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepChainBio/bio-transformers/a4bf57164464f5d763129e6008dbf06263287972/.source/_static/deepchain.png -------------------------------------------------------------------------------- /.source/_static/protein.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepChainBio/bio-transformers/a4bf57164464f5d763129e6008dbf06263287972/.source/_static/protein.png -------------------------------------------------------------------------------- /.source/_static/score_mutation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepChainBio/bio-transformers/a4bf57164464f5d763129e6008dbf06263287972/.source/_static/score_mutation.png -------------------------------------------------------------------------------- /.source/_static/sequence.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepChainBio/bio-transformers/a4bf57164464f5d763129e6008dbf06263287972/.source/_static/sequence.png -------------------------------------------------------------------------------- /.source/_static/transformers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepChainBio/bio-transformers/a4bf57164464f5d763129e6008dbf06263287972/.source/_static/transformers.png -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change log 2 | 3 | # [0.1.3] - 2021-07-09 4 | 5 | Fixed: 6 | - Fix filtering of logits which impacts loglikelihood computation 7 | - Fix fasta file reading in compute_loglikelihood 8 | 9 | Features: 10 | - Add `normalize` mode in compute_loglikelihood. 11 | 12 | 13 | # [0.1.3] - 2021-07-01 14 | 15 | Features: 16 | - Add msa-transformers for methods: 17 | - compute_logits 18 | - compute_embeddings 19 | - compute_probabilities 20 | - compute_accuracy 21 | 22 | Fixed: 23 | - Remove torch DataParallel wrapper. 24 | 25 | # [0.1.0] - 2021-07-01 26 | 27 | Features: 28 | - Add ray worker for multi-gpus inference 29 | 30 | Removed: 31 | - Remove torch DataParallel wrapper. 32 | 33 | # [0.0.10] - 2021-06-14 34 | Note on the release 35 | 36 | Features: 37 | - Add BIO_LOG_LEVEL environnement variable to control logging message (logger) 38 | - Check if every unique amino acids in sequences are in tokens_list (compute_probabilities) 39 | 40 | Fixed: 41 | - Add shuffling in batch_sampler (lightning_utils) 42 | - Fix tokens argument for dataloader (lightning_utils) 43 | - Fix rtd CI to separates docs and package environment. 44 | 45 | Changed: 46 | - Modified the signature of some functions to improve clarity (tansformers_wrappers) 47 | - Update `train_masked` method to `finetune` (tansformers_wrappers) 48 | - `compute_embeddings` with option `full` return a list of embeddingsn, no matter the size (tansformers_wrappers) 49 | 50 | Removed: 51 | - Remove the tokens_list argument when not necessary and tried to make its usage clearer (tansformers_wrappers) 52 | - Remove functions (tansformers_wrappers): 53 | - _filter_and_pool_embeddings 54 | - _split_logits 55 | - _slabels_remaping 56 | - _filter_logits 57 | - _filter_loglikelihood 58 | - _compute_accuracy 59 | - _compute_calibration 60 | 61 | 62 | # [0.0.9] - 2021-06-04 63 | 64 | Fixed: 65 | - Batch_sampler issue 66 | 67 | # [0.0.8] - 2021-06-03 68 | Note on the release 69 | 70 | Features: 71 | - Merge ESM/protbert for finetuning model with pytorch-lightning 72 | - Possibility to restore a training session. 73 | 74 | Fixed: 75 | - Fix conflicts when saving model with DDP 76 | - Fix loading checkpoint created by pytorch-lightning 77 | 78 | 79 | # [0.0.7] - 2021-05-12 80 | Note on the release 81 | 82 | Features: 83 | - Add fasta files support for each compute function. 84 | - Add train_masked function to finetune model on custom dataset. (Only ESM for the moment, protbert is coming.) 85 | 86 | Docs: 87 | - Update documentation to add tutorial on training. 88 | 89 | Changed: 90 | - GPU is used by default if found, even if not specified. 91 | 92 | # [0.0.6] - 2021-05-24 93 | Note on the release 94 | 95 | Fixed: 96 | - Update torch dependencies to be less restrictive. Create conflict with other packages. 97 | 98 | # [0.0.5] - 2021-05-12 99 | 100 | Note on the release 101 | 102 | Added 103 | - added multi-gpu support for inference 104 | - added function to finetuned a model on a specific dataset on multi-gpu 105 | 106 | Changed 107 | 108 | Fixed 109 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | In order to contibute to this repository you will need developer access to this repo. To know more about the project go to the [README](README.md) first. 4 | 5 | 6 | ## Install Dev environment 7 | 8 | From the root of this repo, run 9 | `conda env create -f environment_dev.yaml` 10 | 11 | ## Pre-commit hooks 12 | 13 | Pre-commits hooks have been configured for this project using the [pre-commit](https://pre-commit.com/) library: 14 | 15 | - [black](https://github.com/psf/black) python formatter 16 | - [flake8](https://flake8.pycqa.org/en/latest/) python linter 17 | - [isort](https://pypi.org/project/isort/) sorts imports 18 | 19 | To get them going on your side, make sure to have python installed, and run the following 20 | commands from the root directory of this repository: 21 | 22 | ```bash 23 | pip install pre-commit 24 | pre-commit install 25 | pre-commit run --all-files 26 | ``` 27 | 28 | # Git conventions 29 | 30 | - The section relies on the [Commit Message Guidelines](https://github.com/angular/angular/blob/master/CONTRIBUTING.md#commit) 31 | - It provides conventions to write commits messages based on the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) 32 | 33 | - It aims to : 34 | - Get a well-structured and easily understandable git history 35 | - Generate changelogs easily for each release since we can use scripts that parse the commit messages 36 | 37 | 38 | The commit messages must have the following structure : 39 | 40 | ``` 41 | (): 42 | 43 | 44 | 45 |