├── .github └── workflows │ └── publish-to-test-pypi.yml ├── .gitignore ├── .readthedocs.yml ├── CITATION.cff ├── LICENSE ├── README.md ├── divbrowse ├── README.md ├── __init__.py ├── _version.py ├── brapi │ ├── __init__.py │ └── v2 │ │ ├── __init__.py │ │ ├── allelematrix.py │ │ ├── blueprint.py │ │ └── variants.py ├── cli.py ├── divbrowse.config.yml.example ├── divbrowse.config.yml.skeleton ├── lib │ ├── __init__.py │ ├── analysis.py │ ├── annotation_data.py │ ├── genotype_data.py │ ├── utils.py │ └── variant_calls_slice.py ├── server.py ├── setup.py └── wsgi.py ├── docs ├── Makefile ├── make.bat └── source │ ├── conf.py │ ├── configuration.rst │ ├── images │ ├── divbrowse_logo.png │ ├── divbrowse_main_gui_screenshot.png │ ├── paper_figures_general_architecture.png │ └── paper_figures_usage_concept.png │ ├── index.rst │ ├── installation.rst │ ├── introduction.rst │ ├── javascript_api.rst │ ├── tutorial.rst │ └── usage.rst ├── environment.yml ├── examples ├── homo_sapiens │ └── divbrowse.config.yml ├── hordeum_vulgare │ └── divbrowse.config.yml └── mus_musculus │ └── divbrowse.config.yml ├── frontend ├── .vscode │ └── extensions.json ├── README.md ├── index.html ├── jsconfig.json ├── package.json ├── src │ ├── App.svelte │ ├── app.css │ ├── components │ │ ├── ModalMulti.svelte │ │ ├── Modals.svelte │ │ ├── Navigation.svelte │ │ ├── Tippy.svelte │ │ ├── modals │ │ │ ├── Blast.svelte │ │ │ ├── Clustermap.svelte │ │ │ ├── DataAnalysis.svelte │ │ │ ├── DataAnalysisAndExport.svelte │ │ │ ├── DataSummary.svelte │ │ │ ├── DummyModal.svelte │ │ │ ├── GeneDetails.svelte │ │ │ ├── GeneSearch.svelte │ │ │ ├── GeneSearchModalTable.svelte │ │ │ ├── GeneSearchModalTableFast.svelte │ │ │ ├── GeneSearchModalTable__backup.svelte │ │ │ ├── SelectVariantsComponent.svelte │ │ │ ├── Settings.svelte │ │ │ ├── SnpEffAnnotation.svelte │ │ │ ├── SortSamples.svelte │ │ │ └── VariantFilter.svelte │ │ ├── tracks │ │ │ ├── ChromosomeMinimap.svelte │ │ │ ├── GenomicRegion.svelte │ │ │ ├── GenomicRegionGrid.svelte │ │ │ ├── HeterozygousCallsFrequencyHeatmap.svelte │ │ │ ├── MeanPairwiseDifferenceHeatmap.svelte │ │ │ ├── MinorAlleleFrequencyHeatmap.svelte │ │ │ ├── Positions.svelte │ │ │ ├── Reference.svelte │ │ │ ├── SampleVariants.svelte │ │ │ ├── SampleVariantsMinimap.svelte │ │ │ └── SnpEff.svelte │ │ └── utils │ │ │ └── LoadingAnimation.svelte │ ├── lib │ │ ├── Controller.js │ │ └── DataLoader.js │ ├── main.js │ ├── renderer │ │ └── RendererGapless.svelte │ ├── utils │ │ ├── eventbus.js │ │ ├── helpers.js │ │ ├── logging.js │ │ └── store.js │ ├── vendor │ │ └── plotly.min.js │ └── vite-env.d.ts └── vite.config.js └── pyproject.toml /.github/workflows/publish-to-test-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python distribution to TestPyPI 2 | on: push 3 | jobs: 4 | build-n-publish: 5 | name: Publish Python distribution to TestPyPI 6 | runs-on: ubuntu-20.04 7 | steps: 8 | - uses: actions/checkout@master 9 | - name: Set up Python 3.9 10 | uses: actions/setup-python@v2 11 | with: 12 | python-version: '3.9' 13 | - name: Change version in pyproject.toml to a dev version string 14 | run: | 15 | version=$(awk -F'[ ="]+' '$1 == "version" { print $2 }' pyproject.toml) 16 | echo $version 17 | sed -i "s/$version/$version.dev0/g" pyproject.toml 18 | cat pyproject.toml 19 | - name: Install pypa/build 20 | run: >- 21 | python -m 22 | pip install 23 | build 24 | --user 25 | - name: Build a binary wheel and a source tarball 26 | run: >- 27 | python -m 28 | build 29 | --sdist 30 | --wheel 31 | --outdir dist/ 32 | - name: Publish distribution to Test PyPI 33 | uses: pypa/gh-action-pypi-publish@master 34 | with: 35 | verify_metadata: false 36 | skip_existing: true 37 | verbose: true 38 | user: __token__ 39 | password: ${{ secrets.TEST_PYPI_API_TOKEN }} 40 | repository_url: https://test.pypi.org/legacy/ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # frontend 2 | /frontend/node_modules/ 3 | /frontend/package-lock.json 4 | /frontend/dist/ 5 | /frontend/dist-ssr/ 6 | /frontend/*.local 7 | .DS_Store 8 | 9 | #backend 10 | divbrowse/static/ 11 | 12 | # poetry 13 | poetry.lock 14 | 15 | 16 | 17 | # Logs 18 | logs 19 | *.log 20 | npm-debug.log* 21 | yarn-debug.log* 22 | yarn-error.log* 23 | pnpm-debug.log* 24 | lerna-debug.log* 25 | 26 | # Diagnostic reports (https://nodejs.org/api/report.html) 27 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 28 | 29 | # Runtime data 30 | pids 31 | *.pid 32 | *.seed 33 | *.pid.lock 34 | 35 | # Directory for instrumented libs generated by jscoverage/JSCover 36 | lib-cov 37 | 38 | # Coverage directory used by tools like istanbul 39 | coverage 40 | *.lcov 41 | 42 | # nyc test coverage 43 | .nyc_output 44 | 45 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 46 | .grunt 47 | 48 | # Bower dependency directory (https://bower.io/) 49 | bower_components 50 | 51 | # node-waf configuration 52 | .lock-wscript 53 | 54 | # Compiled binary addons (https://nodejs.org/api/addons.html) 55 | build/Release 56 | 57 | # Dependency directories 58 | node_modules/ 59 | jspm_packages/ 60 | 61 | # TypeScript v1 declaration files 62 | typings/ 63 | 64 | # TypeScript cache 65 | *.tsbuildinfo 66 | 67 | # Optional npm cache directory 68 | .npm 69 | 70 | # Optional eslint cache 71 | .eslintcache 72 | 73 | # Microbundle cache 74 | .rpt2_cache/ 75 | .rts2_cache_cjs/ 76 | .rts2_cache_es/ 77 | .rts2_cache_umd/ 78 | 79 | # Optional REPL history 80 | .node_repl_history 81 | 82 | # Output of 'npm pack' 83 | *.tgz 84 | 85 | # Yarn Integrity file 86 | .yarn-integrity 87 | 88 | # dotenv environment variables file 89 | .env 90 | .env.test 91 | 92 | # parcel-bundler cache (https://parceljs.org/) 93 | .cache 94 | 95 | # Next.js build output 96 | .next 97 | 98 | # Nuxt.js build / generate output 99 | .nuxt 100 | dist 101 | 102 | # Gatsby files 103 | .cache/ 104 | # Comment in the public line in if your project uses Gatsby and *not* Next.js 105 | # https://nextjs.org/blog/next-9-1#public-directory-support 106 | # public 107 | 108 | # vuepress build output 109 | .vuepress/dist 110 | 111 | # Serverless directories 112 | .serverless/ 113 | 114 | # FuseBox cache 115 | .fusebox/ 116 | 117 | # DynamoDB Local files 118 | .dynamodb/ 119 | 120 | # TernJS port file 121 | .tern-port 122 | 123 | # Editor directories and files 124 | .vscode/* 125 | !.vscode/extensions.json 126 | .idea 127 | .DS_Store 128 | *.suo 129 | *.ntvs* 130 | *.njsproj 131 | *.sln 132 | *.sw? -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | image: testing 5 | 6 | sphinx: 7 | configuration: docs/source/conf.py 8 | 9 | python: 10 | version: 3.9 11 | install: 12 | - method: pip 13 | path: . 14 | extra_requirements: 15 | - docs -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # This CITATION.cff file was generated with cffinit. 2 | # Visit https://bit.ly/cffinit to generate yours today! 3 | 4 | cff-version: 1.2.0 5 | title: >- 6 | DivBrowse - interactive visualization and 7 | exploratory data analysis of variant call matrices 8 | message: >- 9 | If you use this software, please cite it using the 10 | metadata from this file. 11 | preferred-citation: 12 | authors: 13 | - given-names: Patrick 14 | family-names: König 15 | email: koenig@ipk-gatersleben.de 16 | orcid: 'https://orcid.org/0000-0002-8948-6793' 17 | affiliation: >- 18 | Leibniz Institute of Plant Genetics and Crop 19 | Plant Research (IPK) Gatersleben, 06466 20 | Seeland, Germany 21 | - given-names: Sebastian 22 | family-names: Beier 23 | orcid: 'https://orcid.org/0000-0002-2177-8781' 24 | affiliation: >- 25 | Leibniz Institute of Plant Genetics and Crop 26 | Plant Research (IPK) Gatersleben, 06466 27 | Seeland, Germany 28 | email: s.beier@fz-juelich.de 29 | - given-names: Martin 30 | family-names: Mascher 31 | email: mascher@ipk-gatersleben.de 32 | affiliation: >- 33 | Leibniz Institute of Plant Genetics and Crop 34 | Plant Research (IPK) Gatersleben, 06466 35 | Seeland, Germany 36 | orcid: 'https://orcid.org/0000-0001-6373-6013' 37 | - given-names: Nils 38 | family-names: Stein 39 | orcid: 'https://orcid.org/0000-0003-3011-8731' 40 | affiliation: >- 41 | Leibniz Institute of Plant Genetics and Crop 42 | Plant Research (IPK) Gatersleben, 06466 43 | Seeland, Germany 44 | email: stein@ipk-gatersleben.de 45 | - given-names: Matthias 46 | family-names: Lange 47 | email: lange@ipk-gatersleben.de 48 | orcid: 'https://orcid.org/0000-0002-4316-078X' 49 | affiliation: >- 50 | Leibniz Institute of Plant Genetics and Crop 51 | Plant Research (IPK) Gatersleben, 06466 52 | Seeland, Germany 53 | - given-names: Uwe 54 | family-names: Scholz 55 | orcid: 'https://orcid.org/0000-0001-6113-3518' 56 | email: scholz@ipk-gatersleben.de 57 | affiliation: >- 58 | Leibniz Institute of Plant Genetics and Crop 59 | Plant Research (IPK) Gatersleben, 06466 60 | Seeland, Germany 61 | title: DivBrowse - interactive visualization and exploratory data analysis of variant call matrices 62 | type: article 63 | doi: 10.1093/gigascience/giad025 64 | journal: GigaScience 65 | volume: 12 66 | year: 2023 67 | type: software 68 | authors: 69 | - given-names: Patrick 70 | family-names: König 71 | email: koenig@ipk-gatersleben.de 72 | orcid: 'https://orcid.org/0000-0002-8948-6793' 73 | affiliation: >- 74 | Leibniz Institute of Plant Genetics and Crop 75 | Plant Research (IPK) Gatersleben, 06466 76 | Seeland, Germany 77 | - given-names: Sebastian 78 | family-names: Beier 79 | orcid: 'https://orcid.org/0000-0002-2177-8781' 80 | affiliation: >- 81 | Leibniz Institute of Plant Genetics and Crop 82 | Plant Research (IPK) Gatersleben, 06466 83 | Seeland, Germany 84 | email: s.beier@fz-juelich.de 85 | - given-names: Martin 86 | family-names: Mascher 87 | email: mascher@ipk-gatersleben.de 88 | affiliation: >- 89 | Leibniz Institute of Plant Genetics and Crop 90 | Plant Research (IPK) Gatersleben, 06466 91 | Seeland, Germany 92 | orcid: 'https://orcid.org/0000-0001-6373-6013' 93 | - given-names: Nils 94 | family-names: Stein 95 | orcid: 'https://orcid.org/0000-0003-3011-8731' 96 | affiliation: >- 97 | Leibniz Institute of Plant Genetics and Crop 98 | Plant Research (IPK) Gatersleben, 06466 99 | Seeland, Germany 100 | email: stein@ipk-gatersleben.de 101 | - given-names: Matthias 102 | family-names: Lange 103 | email: lange@ipk-gatersleben.de 104 | orcid: 'https://orcid.org/0000-0002-4316-078X' 105 | affiliation: >- 106 | Leibniz Institute of Plant Genetics and Crop 107 | Plant Research (IPK) Gatersleben, 06466 108 | Seeland, Germany 109 | - given-names: Uwe 110 | family-names: Scholz 111 | orcid: 'https://orcid.org/0000-0001-6113-3518' 112 | email: scholz@ipk-gatersleben.de 113 | affiliation: >- 114 | Leibniz Institute of Plant Genetics and Crop 115 | Plant Research (IPK) Gatersleben, 06466 116 | Seeland, Germany 117 | identifiers: 118 | - type: doi 119 | value: 10.1093/gigascience/giad025 120 | description: The DivBrowse paper on Oxford Academic GigaScience Journal. 121 | - type: url 122 | value: 'https://divbrowse.ipk-gatersleben.de' 123 | description: Project homepage 124 | repository-code: 'https://github.com/IPK-BIT/divbrowse' 125 | url: 'https://divbrowse.ipk-gatersleben.de/' 126 | repository-artifact: 'https://pypi.org/project/divbrowse/' 127 | doi: 10.1093/gigascience/giad025 128 | journal: GigaScience 129 | volume: 12 130 | year: 2023 131 | abstract: >- 132 | DivBrowse is a software for interactive 133 | visualization and analysis of the diversity of 134 | genomic variants. It uses VCF and GFF3 files as 135 | data input and consists of a web server written in 136 | Python and a GUI written in Javascript. 137 | keywords: 138 | - genomics 139 | - data visualization 140 | - variation data 141 | - variant call format 142 | - biodiversity 143 | license: MIT 144 | #version: 1.0.1 145 | date-released: '2023-04-21' -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 IPK Gatersleben - Bioinformatics and Information Technology 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |
3 | 4 | 5 | 6 | [![PyPI](https://img.shields.io/pypi/v/divbrowse?color=blue&label=PyPI.org)](https://pypi.org/project/divbrowse/) 7 | [![Docker Image Version (latest semver)](https://img.shields.io/docker/v/ipkbit/divbrowse?color=blue&label=DockerHub)](https://hub.docker.com/r/ipkbit/divbrowse) 8 | ![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/IPK-BIT/divbrowse?color=blue&label=Github) 9 | 10 | [![Peer-reviewed paper in GigaScience Journal](https://img.shields.io/badge/DOI-10.1093%2Fgigascience%2Fgiad025-yellow)](https://doi.org/10.1093/gigascience/giad025) 11 | 12 | 13 | [![Documentation Status](https://readthedocs.org/projects/divbrowse/badge/?version=latest)](https://divbrowse.readthedocs.io/?badge=latest) 14 | [![Python](https://img.shields.io/pypi/pyversions/divbrowse.svg?color=green)](https://badge.fury.io/py/divbrowse) 15 | [![PyPI Downloads](https://img.shields.io/pypi/dm/divbrowse.svg?label=PyPI%20downloads)](https://pypi.org/project/divbrowse/) 16 | [![Libraries.io dependency status for latest release](https://img.shields.io/librariesio/release/pypi/divbrowse)](https://libraries.io/pypi/divbrowse) 17 | ![License](https://img.shields.io/github/license/IPK-BIT/divbrowse) 18 | 19 |
20 | 21 | **Website:** https://divbrowse.ipk-gatersleben.de 22 | **Documentation:** https://divbrowse.readthedocs.io 23 | **Paper:** https://doi.org/10.1093/gigascience/giad025 24 | 25 |
26 | 27 | **Table of contents:** 28 | - [About DivBrowse](#about-divbrowse) 29 | - [Installation](#installation) 30 | - [Try out DivBrowse](#try-out-divbrowse) 31 | - [Screenshots](#screenshots) 32 | - [Usage workflow concept](#usage-workflow-concept) 33 | - [Architecture](#architecture) 34 | 35 |
36 | 37 | ## About DivBrowse 38 | 39 | DivBrowse is a web application for interactive exploration and analysis of very large SNP matrices. 40 | 41 | It offers a novel approach for interactive visualization and analysis of genomic diversity data and optionally also gene annotation data. The use of standard file formats for data input supports interoperability and seamless deployment of application instances based on established bioinformatics pipelines. The possible integration into 3rd-party web applications supports interoperability and reusability. 42 | 43 | The integrated ad-hoc calculation of variant summary statistics and principal component analysis enables the user to perform interactive analysis of population structure for single genetic features like genes, exons and promoter regions. Data interoperability is achieved by the possibility to export genomic diversity data for genomic regions of interest in standardized VCF files. 44 | 45 | ## Installation 46 | 47 | The installation via pip or container images is described in the documentation: https://divbrowse.readthedocs.io/en/stable/installation.html 48 | 49 | ## Try out DivBrowse 50 | 51 | If you want to test DivBrowse please visit the demo instances listed here: 52 | https://divbrowse.ipk-gatersleben.de/#demo-instances 53 | 54 | 55 | ## Screenshots 56 | 57 | ![DivBrowse GUI](https://github.com/IPK-BIT/divbrowse/blob/main/docs/source/images/divbrowse_main_gui_screenshot.png?raw=true) 58 | 59 | 60 | ## Usage workflow concept 61 | 62 | ![Usage workflow concept](https://github.com/IPK-BIT/divbrowse/blob/main/docs/source/images/paper_figures_usage_concept.png?raw=true) 63 | 64 | 65 | ## Architecture 66 | 67 | ![Architecture](https://github.com/IPK-BIT/divbrowse/blob/main/docs/source/images/paper_figures_general_architecture.png?raw=true) -------------------------------------------------------------------------------- /divbrowse/README.md: -------------------------------------------------------------------------------- 1 | # divbrowse 2 | A web application for interactive exploration and analysis of very large SNP matrices 3 | -------------------------------------------------------------------------------- /divbrowse/__init__.py: -------------------------------------------------------------------------------- 1 | from ._version import _version 2 | __version__ = _version 3 | 4 | import sys 5 | import logging 6 | 7 | 8 | logging.basicConfig(stream=sys.stderr, level=logging.INFO) 9 | log = logging.getLogger(__name__) 10 | 11 | 12 | logging.getLogger('numexpr').setLevel(logging.WARNING) 13 | logging.getLogger('numba').setLevel(logging.WARNING) 14 | logging.getLogger('h5py').setLevel(logging.WARNING) 15 | logging.getLogger('matplotlib').setLevel(logging.WARNING) 16 | logging.getLogger('matplotlib.font_manager').setLevel(logging.WARNING) -------------------------------------------------------------------------------- /divbrowse/_version.py: -------------------------------------------------------------------------------- 1 | _version = '1.1.0' -------------------------------------------------------------------------------- /divbrowse/brapi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IPK-BIT/divbrowse/db9b5d13ae709813220ba7f25017072631a56971/divbrowse/brapi/__init__.py -------------------------------------------------------------------------------- /divbrowse/brapi/v2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IPK-BIT/divbrowse/db9b5d13ae709813220ba7f25017072631a56971/divbrowse/brapi/v2/__init__.py -------------------------------------------------------------------------------- /divbrowse/brapi/v2/blueprint.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, request, jsonify 2 | 3 | from divbrowse.brapi.v2.allelematrix import BrapiAllelematrix 4 | from divbrowse.brapi.v2.variants import BrapiVariants 5 | 6 | 7 | def get_brapi_blueprint(config, gd, ad): 8 | 9 | brapi_blueprint = Blueprint('brapi', __name__) 10 | 11 | @brapi_blueprint.route("/serverinfo", methods = ['GET', 'OPTIONS']) 12 | def __serverinfo(): 13 | 14 | serverinfo = config.get('brapi', {}).get('serverinfo', {}) 15 | 16 | server_name = serverinfo.get('server_name', None) 17 | server_description = serverinfo.get('server_description', None) 18 | organization_name = serverinfo.get('organization_name', None) 19 | organization_url = serverinfo.get('organization_url', None) 20 | location = serverinfo.get('location', None) 21 | contact_email = serverinfo.get('contact_email', None) 22 | documentation_url = serverinfo.get('documentation_url', None) 23 | 24 | output = { 25 | "@context": [ 26 | "https://brapi.org/jsonld/context/metadata.jsonld" 27 | ], 28 | "metadata": { 29 | "datafiles": [], 30 | "pagination": None, 31 | "status": [ 32 | { 33 | "message": "Request accepted, response successful", 34 | "messageType": "INFO" 35 | } 36 | ] 37 | }, 38 | "result": { 39 | "calls": [ 40 | { 41 | "contentTypes": ["application/json"], 42 | "dataTypes": ["application/json"], 43 | "methods": ["GET",], 44 | "service": "serverinfo", 45 | "versions": ["2.1"] 46 | }, 47 | { 48 | "contentTypes": ["application/json"], 49 | "dataTypes": ["application/json"], 50 | "methods": ["GET",], 51 | "service": "commoncropnames", 52 | "versions": ["2.1"] 53 | }, 54 | { 55 | "contentTypes": ["application/json"], 56 | "dataTypes": ["application/json"], 57 | "methods": ["GET",], 58 | "service": "variants", 59 | "versions": ["2.1"] 60 | }, 61 | { 62 | "contentTypes": ["application/json"], 63 | "dataTypes": ["application/json"], 64 | "methods": ["GET",], 65 | "service": "allelematrix", 66 | "versions": ["2.1"] 67 | } 68 | ], 69 | "contactEmail": contact_email, 70 | "documentationURL": documentation_url, 71 | "location": location, 72 | "organizationName": organization_name, 73 | "organizationURL": organization_url, 74 | "serverDescription": server_description, 75 | "serverName": server_name 76 | } 77 | } 78 | 79 | return jsonify(output) 80 | 81 | 82 | 83 | @brapi_blueprint.route("/commoncropnames", methods = ['GET', 'OPTIONS']) 84 | def __commoncropnames(): 85 | 86 | commoncropname = config.get('brapi', {}).get('commoncropname', 'unknown') 87 | 88 | output = { 89 | "@context": [ 90 | "https://brapi.org/jsonld/context/metadata.jsonld" 91 | ], 92 | "metadata": { 93 | "datafiles": [], 94 | "pagination": { 95 | "currentPage": 0, 96 | "pageSize": 1000, 97 | "totalCount": 1, 98 | "totalPages": 1 99 | }, 100 | "status": [ 101 | { 102 | "message": "Request accepted, response successful", 103 | "messageType": "INFO" 104 | } 105 | ] 106 | }, 107 | "result": { 108 | "data": [commoncropname] 109 | } 110 | } 111 | return jsonify(output) 112 | 113 | 114 | 115 | @brapi_blueprint.route("/variants", methods = ['GET', 'OPTIONS']) 116 | def __variants(): 117 | if request.method == 'GET': 118 | pass 119 | 120 | else: 121 | #raise ApiError('Method not allowed', status_code=405) 122 | return '' 123 | 124 | brapi_variants = BrapiVariants(gd, request) 125 | 126 | return jsonify(brapi_variants.get_response_object()) 127 | 128 | 129 | 130 | @brapi_blueprint.route("/allelematrix", methods = ['GET', 'OPTIONS']) 131 | def __allelematrix(): 132 | 133 | if request.method == 'GET': 134 | pass 135 | 136 | else: 137 | #raise ApiError('Method not allowed', status_code=405) 138 | return '' 139 | 140 | brapi_allelematrix = BrapiAllelematrix(gd, request) 141 | 142 | return jsonify(brapi_allelematrix.get_response_object()) 143 | 144 | 145 | 146 | return brapi_blueprint -------------------------------------------------------------------------------- /divbrowse/brapi/v2/variants.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | #from icecream import ic 4 | 5 | class BrapiVariants(): 6 | 7 | def __init__(self, gd, request): 8 | 9 | self.gd = gd 10 | self.request = request 11 | 12 | self.status_messages = [] 13 | self.data_matrices = [] 14 | 15 | self._parse_request(request) 16 | self._count_variants() 17 | self._setup_pagination() 18 | 19 | self._add_data() 20 | 21 | self.status_messages.append({ 22 | 'message': 'Request accepted, response successful', 23 | 'messageType': 'INFO' 24 | }) 25 | 26 | 27 | def _parse_request(self, request): 28 | 29 | self.has_variant_db_id = False 30 | input = {} 31 | 32 | variant_db_id = request.args.get('variantDbId', default = '', type = str) 33 | 34 | if variant_db_id: 35 | try: 36 | variant_db_id_splitted = variant_db_id.split(':') 37 | input['chrom'] = variant_db_id_splitted[0] 38 | input['pos'] = int(variant_db_id_splitted[1]) 39 | self.has_variant_db_id = True 40 | 41 | except: 42 | self.status_messages.append({ 43 | 'message': 'variantDbId seems to be malformatted. It should have the format `chromosome:position`. Example: `1:56242`', 44 | 'messageType': 'ERROR' 45 | }) 46 | 47 | input['page'] = request.args.get('page', default = 0, type = int) 48 | input['pageSize'] = request.args.get('pageSize', default = 1000, type = int) 49 | 50 | self.input = input 51 | 52 | 53 | 54 | def _count_variants(self): 55 | 56 | self.slice_variant_calls = False 57 | 58 | if self.has_variant_db_id: 59 | self.count_variants = 1 60 | else: 61 | self.count_variants = self.gd.count_variants 62 | 63 | 64 | 65 | def _setup_pagination(self): 66 | 67 | total_pages = math.ceil(self.count_variants / self.input['pageSize']) 68 | 69 | if self.input['page'] >= total_pages: 70 | self.input['page'] = total_pages - 1 71 | 72 | self.status_messages.append({ 73 | 'message': 'Given parameter `page` was bigger than corresponding `totalPages` would allow. `currentPage` was set to the biggest possible value of '+str(self.input['page']), 74 | 'messageType': 'WARNING' 75 | }) 76 | 77 | 78 | self.pagination = { 79 | 'currentPage': self.input['page'], 80 | 'pageSize': self.input['pageSize'], 81 | 'totalCount': self.count_variants, 82 | 'totalPages': total_pages 83 | } 84 | 85 | 86 | 87 | def _add_data(self): 88 | 89 | reference_bases = None 90 | alternate_bases = None 91 | 92 | if self.has_variant_db_id: 93 | 94 | coord, lookup_type_start = self.gd.get_posidx_by_genome_coordinate(self.input['chrom'], self.input['pos']) 95 | reference_bases = self.gd.callset['variants/REF'].get_basic_selection(coord) 96 | reference_bases = [reference_bases] 97 | 98 | alternate_bases = self.gd.callset['variants/ALT'].get_basic_selection(coord).tolist() 99 | 100 | chrom = self.gd.callset['variants/CHROM'].get_basic_selection(coord).tolist() 101 | pos = self.gd.callset['variants/POS'].get_basic_selection(coord).tolist() 102 | 103 | else: 104 | 105 | coord_start = self.input['page'] * self.input['pageSize'] 106 | coord_end = coord_start + self.input['pageSize'] 107 | 108 | reference_bases = self.gd.callset['variants/REF'].get_basic_selection(slice(coord_start, coord_end)).tolist() 109 | 110 | alternate_bases = self.gd.callset['variants/ALT'].get_basic_selection(slice(coord_start, coord_end)).tolist() 111 | 112 | chrom = self.gd.callset['variants/CHROM'].get_basic_selection(slice(coord_start, coord_end)).tolist() 113 | pos = self.gd.callset['variants/POS'].get_basic_selection(slice(coord_start, coord_end)).tolist() 114 | 115 | 116 | data = [] 117 | 118 | for i, ref_base in enumerate(reference_bases): 119 | data.append({ 120 | 'additionalInfo': {}, 121 | 'referenceBases': ref_base, 122 | 'alternateBases': [x for x in alternate_bases[i] if x != ''], 123 | 'ciend': [], 124 | 'cipos': [], 125 | 'created': None, 126 | 'updated': None, 127 | 'start': pos[i], 128 | 'end': pos[i], 129 | 'svlen': None, 130 | 'externalReferences': [{}], 131 | 'filtersApplied': False, 132 | 'filtersFailed': [], 133 | 'filtersPassed': False, 134 | 'referenceDbId': None, 135 | 'referenceName': '', 136 | 'referenceSetDbId': None, 137 | 'referenceSetName': '', 138 | 'variantDbId': str(chrom[i])+':'+str(pos[i]), 139 | 'variantNames': [], 140 | 'variantSetDbId': [], 141 | 'variantType': 'SNV' 142 | }) 143 | 144 | self.data = data 145 | 146 | 147 | 148 | def get_response_object(self): 149 | 150 | brapi_response = { 151 | "@context": [ 152 | "https://brapi.org/jsonld/context/metadata.jsonld" 153 | ], 154 | "metadata": { 155 | "datafiles": [], 156 | "pagination": self.pagination, 157 | "status": self.status_messages 158 | }, 159 | "result": { 160 | "data": self.data 161 | } 162 | } 163 | 164 | return brapi_response -------------------------------------------------------------------------------- /divbrowse/cli.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | 3 | sys.setrecursionlimit(1000000) 4 | 5 | from pprint import pprint 6 | 7 | import click 8 | import yaml 9 | import gzip 10 | import allel 11 | import zarr 12 | import pandas as pd 13 | import numcodecs 14 | from waitress import serve 15 | 16 | from divbrowse import log 17 | 18 | 19 | from divbrowse import __version__ as DIVBROWSE_VERSION 20 | 21 | def get_config_skeleton(): 22 | path_config_skeleton = os.path.join(os.path.dirname(__file__), 'divbrowse.config.yml.skeleton') 23 | try: 24 | with open(path_config_skeleton) as config_file: 25 | config = yaml.full_load(config_file) 26 | except FileNotFoundError: 27 | log.error('Divbrowse config file `divbrowse.config.yml` not found in current directory!') 28 | exit(1) 29 | 30 | return config 31 | 32 | 33 | def get_chromosomes(path_zarr): 34 | callset = zarr.open_group(path_zarr, mode='r') 35 | chromosomes = pd.unique(callset['variants/CHROM'][:]).tolist() 36 | return chromosomes 37 | 38 | 39 | 40 | @click.group() 41 | @click.version_option(prog_name='DivBrowse', version=DIVBROWSE_VERSION) 42 | def main(): 43 | """This is the DivBrowse CLI""" 44 | pass 45 | 46 | 47 | @click.command() 48 | def calcsumstats(): 49 | 50 | click.echo('Starting calculation of variant summary statistics...') 51 | log.info('Starting calculation of variant summary statistics...') 52 | 53 | from divbrowse.lib.annotation_data import AnnotationData 54 | from divbrowse.lib.genotype_data import GenotypeData 55 | 56 | try: 57 | with open('divbrowse.config.yml') as config_file: 58 | config = yaml.full_load(config_file) 59 | except FileNotFoundError: 60 | log.error('Divbrowse config file `divbrowse.config.yml` not found in current directory!') 61 | exit(1) 62 | 63 | gd = GenotypeData(config) 64 | ad = AnnotationData(config, gd) 65 | 66 | vcf_header_lines = gd.get_vcf_header() 67 | print(vcf_header_lines) 68 | 69 | 70 | 71 | 72 | 73 | @click.command() 74 | @click.option('--path-vcf', help='Full path to to VCF file that should be converted to a Zarr archive') 75 | @click.option('--path-zarr', help='Full path where to save the Zarr archive') 76 | def vcf2zarr(path_vcf: str, path_zarr: str): 77 | 78 | if path_vcf == None: 79 | vcf_files = [] 80 | path = os.getcwd() 81 | 82 | with os.scandir(path) as entries: 83 | for entry in entries: 84 | if entry.is_file() and entry.name.lower().endswith(('.vcf', '.vcf.gz')): 85 | vcf_files.append(entry.path) 86 | 87 | click.secho('The following VCF files have been found in current working directory. Please choose the one you want to convert to the Zarr format:', fg='yellow') 88 | i = 0 89 | for _vcf_file in vcf_files: 90 | click.secho('['+str(i)+'] '+os.path.basename(_vcf_file), fg='yellow') 91 | i = i + 1 92 | 93 | selected_vcf_number = click.prompt(click.style('Please enter the number of the VCF file', fg='yellow'), type=int) 94 | 95 | if selected_vcf_number >= len(vcf_files): 96 | log.error('The given number for a VCF file is not valid.') 97 | exit(1) 98 | 99 | path_vcf = str(vcf_files[selected_vcf_number]) 100 | click.secho('You have selected the following VCF file:: '+path_vcf, fg='yellow') 101 | 102 | if path_zarr == None: 103 | path_zarr = path_vcf + '.zarr' 104 | 105 | 106 | vcf_header_lines = [] 107 | if path_vcf.endswith('gz'): 108 | vcf_file = gzip.open(path_vcf, mode='rt', encoding='utf-8') 109 | else: 110 | vcf_file = open(path_vcf, mode='r', buffering=0, encoding='utf-8') 111 | 112 | vcf_line = vcf_file.readline() 113 | while vcf_line: 114 | if vcf_line.startswith('##'): 115 | vcf_header_lines.append(vcf_line) 116 | vcf_line = vcf_file.readline() 117 | else: 118 | break 119 | 120 | with open("____vcf_export_header_lines____.vcf", "w") as vcf_header_lines_file: 121 | vcf_header_lines_file.write("".join(vcf_header_lines)) 122 | 123 | 124 | try: 125 | allel.vcf_to_zarr(path_vcf, path_zarr, group='/', fields='*', exclude_fields=['variants/numalt', 'variants/altlen', 'variants/is_snp'], log=sys.stdout, compressor=numcodecs.Blosc(cname='zstd', clevel=5, shuffle=False)) 126 | except ValueError as error_msg: 127 | log.error(error_msg) 128 | 129 | 130 | click.secho('Conversion to Zarr finished.', fg='green') 131 | click.secho('The Zarr archive was saved to this path: '+path_zarr, fg='green') 132 | 133 | 134 | 135 | @click.command() 136 | @click.option('--host', default='0.0.0.0', help='IP address to bind the DivBrowse server to', show_default=True) 137 | @click.option('--port', default='8080', help='Port number to bind the DivBrowse server to', show_default=True) 138 | @click.option('--infer-config', is_flag=True, help='If set: infer a basic configuration from the provided VCF and GFF/GFF3 files and do not use an existing `divbrowse.config.yml`') 139 | @click.option('--save-config', type=click.Path(file_okay=True, writable=True), help='Save the inferred configuration as a YAML file. Please provide a relative or absolute path.') 140 | def start(host: str, port: str, infer_config: bool, save_config): 141 | from divbrowse.server import create_app 142 | 143 | if infer_config: 144 | 145 | vcf_files = [] 146 | path = os.getcwd() 147 | 148 | with os.scandir(path) as entries: 149 | for entry in entries: 150 | if entry.is_file() and entry.name.lower().endswith(('.vcf', '.vcf.gz')): 151 | vcf_files.append(entry.path) 152 | 153 | click.secho('The following VCF files have been found. Please choose the one you want to visualize with DivBrowse:', fg='yellow') 154 | i = 0 155 | for _vcf_file in vcf_files: 156 | click.secho('['+str(i)+'] '+os.path.basename(_vcf_file), fg='yellow') 157 | i = i + 1 158 | 159 | selected_vcf_number = click.prompt(click.style('Please enter the number of the VCF file', fg='yellow'), type=int) 160 | 161 | if selected_vcf_number >= len(vcf_files): 162 | log.error('The given number for a VCF file is not valid.') 163 | exit(1) 164 | 165 | click.secho('Your VCF choice: '+str(vcf_files[selected_vcf_number]), fg='yellow') 166 | selected_vcf = vcf_files[selected_vcf_number] 167 | 168 | path_zarr = selected_vcf + '.zarr' 169 | if not os.path.isdir(path_zarr): 170 | allel.vcf_to_zarr(selected_vcf, path_zarr, group='/', fields='*', log=sys.stdout, compressor=numcodecs.Blosc(cname='zstd', clevel=5, shuffle=False)) # cname='zstd' 171 | 172 | 173 | gff3_files = [] 174 | with os.scandir(path) as entries: 175 | for entry in entries: 176 | if entry.is_file() and entry.name.lower().endswith(('.gff', '.gff3')): 177 | gff3_files.append(entry.path) 178 | 179 | 180 | click.secho('The following GFF/GFF3 files have been found. Please choose the one you want to visualize with DivBrowse:', fg='yellow') 181 | i = 0 182 | for _gff3_file in gff3_files: 183 | click.secho('['+str(i)+'] '+os.path.basename(_gff3_file), fg='yellow') 184 | 185 | selected_gff3_number = click.prompt(click.style('Please enter the number of the GFF/GFF3 file', fg='yellow'), type=int) 186 | 187 | if selected_gff3_number >= len(gff3_files): 188 | log.error('The given number for a GFF/GFF3 file is not valid.') 189 | exit(1) 190 | 191 | click.secho('Your GFF/GFF3 choice: '+str(gff3_files[selected_gff3_number]), fg='yellow') 192 | selected_gff3 = gff3_files[selected_gff3_number] 193 | 194 | 195 | chromosomes_vcf = get_chromosomes(path_zarr) 196 | chromosome_labels = dict(zip(chromosomes_vcf, chromosomes_vcf)) 197 | 198 | centromeres_positions = {k:'0' for k, v in chromosome_labels.items()} 199 | 200 | genes = allel.gff3_to_dataframe(selected_gff3) 201 | chromosomes_gff = genes['seqid'].unique().tolist() 202 | gff3_chromosome_labels = dict(zip(chromosomes_vcf, chromosomes_gff)) 203 | 204 | config_runtime = get_config_skeleton() 205 | 206 | config_runtime['datadir'] = path+'/' 207 | config_runtime['variants']['zarr_dir'] = os.path.basename(path_zarr) 208 | config_runtime['gff3']['filename'] = os.path.basename(gff3_files[0]) 209 | config_runtime['gff3']['feature_type_with_description'] = 'gene' 210 | config_runtime['gff3']['main_feature_types_for_genes_track'] = ['gene'] 211 | config_runtime['chromosome_labels'] = chromosome_labels 212 | config_runtime['centromeres_positions'] = centromeres_positions 213 | config_runtime['gff3_chromosome_labels'] = gff3_chromosome_labels 214 | 215 | if save_config is not None: 216 | with open(save_config, 'w') as yaml_file: 217 | yaml.dump(config_runtime, yaml_file, default_flow_style=False) 218 | 219 | click.secho('Starting DivBrowse server...', fg='green', bold=True) 220 | 221 | if host == '0.0.0.0': 222 | import socket 223 | hostname = socket.gethostname() 224 | local_ip_addr = socket.gethostbyname(hostname) 225 | url = 'http://'+local_ip_addr+':'+str(port)+'/index.html' 226 | else: 227 | url = 'http://'+str(host)+':'+str(port)+'/index.html' 228 | 229 | if infer_config: 230 | app = create_app(config_runtime=config_runtime) 231 | else: 232 | app = create_app() 233 | 234 | click.secho('DivBrowse should be available under the following URL: '+str(url), fg='green', bold=True) 235 | 236 | serve(app, host=host, port=int(port)) 237 | 238 | 239 | 240 | 241 | 242 | main.add_command(vcf2zarr) 243 | main.add_command(start) 244 | main.add_command(calcsumstats) 245 | 246 | if __name__ == '__main__': 247 | main() -------------------------------------------------------------------------------- /divbrowse/divbrowse.config.yml.example: -------------------------------------------------------------------------------- 1 | metadata: 2 | general_description: 3 | vcf_doi: 4 | vcf_reference_genome_doi: 5 | gff3_doi: 6 | 7 | 8 | # absolute path to the directory containing the data files 9 | datadir: /opt/divbrowse/data/ 10 | 11 | variants: 12 | # name of the directory containing the Zarr array converted from a VCF file 13 | zarr_dir: SNP_matrix_WGS_300_samples.vcf.zarr 14 | 15 | # filename of the mapping table to map other sample IDs on the VCF sample IDs 16 | sample_id_mapping_filename: 17 | 18 | 19 | gff3: 20 | # filename of the GFF3 file containing the gene annotations 21 | filename: 22 | 23 | additional_attributes_keys: biotype,gene_id 24 | 25 | feature_type_with_description: gene 26 | count_exon_variants: true 27 | 28 | key_confidence: 29 | key_ontology: Ontology_term 30 | main_feature_types_for_genes_track: 31 | - gene 32 | - pseudogene 33 | - ncRNA_gene 34 | external_link_ontology_term: https://www.ebi.ac.uk/QuickGO/term/{ID} 35 | external_links: 36 | - feature_attribute: ID 37 | url: https://some.external.resource.org/{FEATURE_ID} 38 | linktext: Open this gene in an external resource 39 | 40 | features: 41 | pca: true 42 | umap: true 43 | 44 | chromosome_labels: 45 | 1: 1H 46 | 2: 2H 47 | 3: 3H 48 | 4: 4H 49 | 5: 5H 50 | 6: 6H 51 | 7: 7H 52 | 0: Un 53 | 54 | 55 | gff3_chromosome_labels: 56 | 1: chr1H 57 | 2: chr2H 58 | 3: chr3H 59 | 4: chr4H 60 | 5: chr5H 61 | 6: chr6H 62 | 7: chr7H 63 | 0: chrUn 64 | 65 | 66 | centromeres_positions: 67 | 1: 205502676 68 | 2: 305853815 69 | 3: 271947776 70 | 4: 282386439 71 | 5: 205989812 72 | 6: 260041240 73 | 7: 328847863 74 | 0: 0 75 | 76 | 77 | blast: 78 | active: false 79 | galaxy_server_url: https://galaxy-web.ipk-gatersleben.de 80 | galaxy_apikey: 81 | galaxy_user: 82 | galaxy_pass: 83 | blastn: 84 | galaxy_tool_id: ncbi_blastn_wrapper_barley 85 | blast_database: morex_v3 86 | blast_type: megablast 87 | tblastn: 88 | galaxy_tool_id: ncbi_tblastn_wrapper_barley 89 | blast_database: morex_v3 90 | blast_type: tblastn-fast 91 | blast_result_to_vcf_chromosome_mapping: 92 | chr1H: chr1H 93 | chr2H: chr2H 94 | chr3H: chr3H 95 | chr4H: chr4H 96 | chr5H: chr5H 97 | chr6H: chr6H 98 | chr7H: chr7H 99 | chrUn: chrUn 100 | 101 | 102 | brapi: 103 | active: false 104 | commoncropname: barley 105 | serverinfo: 106 | server_name: BrAPI v2.1 endpoints 107 | server_description: Detailed description about this BrAPI server. 108 | organization_name: Institute of Plant Breeding, University of ... 109 | organization_url: institute-xy.org 110 | location: 111 | contact_email: 112 | documentation_url: -------------------------------------------------------------------------------- /divbrowse/divbrowse.config.yml.skeleton: -------------------------------------------------------------------------------- 1 | metadata: 2 | general_description: 3 | vcf_doi: 4 | vcf_reference_genome_doi: 5 | gff3_doi: 6 | 7 | datadir: 8 | 9 | variants: 10 | zarr_dir: my_variant_matrix.zarr 11 | sample_id_mapping_filename: 12 | 13 | gff3: 14 | filename: 15 | additional_attributes_keys: biotype,gene_id 16 | feature_type_with_description: gene 17 | count_exon_variants: false 18 | key_confidence: false 19 | key_ontology: Ontology_term 20 | main_feature_types_for_genes_track: 21 | - gene 22 | external_link_ontology_term: https://www.ebi.ac.uk/QuickGO/term/{ID} 23 | external_links: 24 | 25 | features: 26 | pca: true 27 | umap: true 28 | 29 | chromosome_labels: 30 | 31 | gff3_chromosome_labels: 32 | 33 | centromeres_positions: 34 | 35 | blast: 36 | active: false 37 | galaxy_server_url: 38 | galaxy_apikey: 39 | galaxy_user: 40 | galaxy_pass: 41 | blastn: 42 | galaxy_tool_id: 43 | blast_database: 44 | blast_type: 45 | tblastn: 46 | galaxy_tool_id: 47 | blast_database: 48 | blast_type: 49 | blast_result_to_vcf_chromosome_mapping: 50 | 51 | 52 | brapi: 53 | active: false 54 | commoncropname: 55 | serverinfo: 56 | server_name: 57 | server_description: 58 | organization_name: 59 | organization_url: 60 | location: 61 | contact_email: 62 | documentation_url: -------------------------------------------------------------------------------- /divbrowse/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IPK-BIT/divbrowse/db9b5d13ae709813220ba7f25017072631a56971/divbrowse/lib/__init__.py -------------------------------------------------------------------------------- /divbrowse/lib/analysis.py: -------------------------------------------------------------------------------- 1 | from icecream import ic 2 | from timeit import default_timer as timer 3 | 4 | import numpy as np 5 | 6 | from sklearn.decomposition import PCA 7 | from sklearn.preprocessing import RobustScaler 8 | from sklearn.metrics import pairwise_distances 9 | import umap 10 | 11 | from divbrowse import log 12 | from divbrowse.lib.variant_calls_slice import VariantCallsSlice 13 | 14 | 15 | 16 | def calculate_mean(sliced_variant_calls: np.ndarray) -> np.ndarray: 17 | """Calculate the mean for each variant of a variant matrix array holding the number of alternate alleles 18 | 19 | Note: 20 | Missing variant calls are excluded from the mean calculation 21 | 22 | Args: 23 | sliced_variant_calls (numpy.ndarray): Numpy array representing a variant matrix holding the number of alternate allele calls 24 | 25 | Returns: 26 | numpy.ndarray: Numpy array holding the means per variant 27 | """ 28 | 29 | sliced_variant_calls_missing_values_to_nan = np.where(sliced_variant_calls == -1, np.nan, sliced_variant_calls) 30 | return np.nanmean(sliced_variant_calls_missing_values_to_nan, axis=0) #, keepdims=True 31 | 32 | 33 | def impute_with_mean(sliced_variant_calls: np.ndarray) -> np.ndarray: 34 | """variant matrix array for that missing values should be imputed (replaced) with the mean for the variant 35 | 36 | Args: 37 | sliced_variant_calls (numpy.ndarray): Numpy array representing a variant matrix holding the number of alternate allele calls 38 | 39 | Returns: 40 | numpy.ndarray: Imputed version of the input variant matrix array 41 | """ 42 | 43 | imputed = np.copy(sliced_variant_calls).astype(np.float32) 44 | means = calculate_mean(sliced_variant_calls) 45 | indices_missing = np.where(sliced_variant_calls == -1) 46 | imputed[indices_missing] = np.take(means, indices_missing[1]) 47 | imputed = np.nan_to_num(imputed) 48 | return imputed 49 | 50 | 51 | 52 | class Analysis: 53 | 54 | def __init__(self, variant_calls_slice: VariantCallsSlice): 55 | self.variant_calls_slice = variant_calls_slice # self.variant_calls_slice.samples_selected_mapped 56 | self.imputed_calls = None 57 | 58 | 59 | def get_imputed_calls(self): 60 | if self.imputed_calls is None: 61 | self.imputed_calls = impute_with_mean(self.variant_calls_slice.numbers_of_alternate_alleles) 62 | 63 | return self.imputed_calls 64 | 65 | 66 | def calc_distance_to_reference(self, samples): 67 | 68 | calls_imputed = self.get_imputed_calls() 69 | 70 | ref_vec = np.zeros(self.variant_calls_slice.numbers_of_alternate_alleles.shape[1]).reshape(1, self.variant_calls_slice.numbers_of_alternate_alleles.shape[1]) 71 | 72 | start = timer() 73 | distances = pairwise_distances(calls_imputed, ref_vec, n_jobs=1, metric='hamming') 74 | distances = distances * self.variant_calls_slice.numbers_of_alternate_alleles.shape[1] 75 | distances = distances.astype(np.int16); 76 | sample_ids = np.array(self.variant_calls_slice.samples_selected_mapped).reshape(samples[self.variant_calls_slice.samples_mask].shape[0], 1) 77 | distances_combined = np.concatenate((sample_ids, distances), axis=1) 78 | log.debug("==== pairwise_distances() calculation time: %f", timer() - start) 79 | return distances_combined 80 | 81 | 82 | def calc_distance_matrix(self, samples): 83 | calls_imputed = self.get_imputed_calls() 84 | 85 | start = timer() 86 | distances = pairwise_distances(calls_imputed, n_jobs=4, metric='hamming') 87 | distances = distances * self.variant_calls_slice.numbers_of_alternate_alleles.shape[1] 88 | distances = distances.astype(np.int16); 89 | #sample_ids = np.array(self.variant_calls_slice.samples_selected_mapped).reshape(samples[self.variant_calls_slice.samples_mask].shape[0], 1) 90 | #distances_combined = np.concatenate((sample_ids, distances), axis=1) 91 | #log.debug("==== pairwise_distances() calculation time: %f", timer() - start) 92 | #return distances_combined 93 | return distances 94 | 95 | 96 | def pca(self): 97 | """Calculate a PCA for a variant matrix array 98 | 99 | Args: 100 | slice_of_variant_calls (numpy.ndarray): Numpy array representing a variant matrix holding the number of alternate allele calls 101 | 102 | Returns: 103 | numpy.ndarray: PCA result aligned with the sample IDs in the first column 104 | """ 105 | 106 | sample_ids = np.array(self.variant_calls_slice.samples_selected_mapped).reshape((-1, 1)).copy() 107 | #calls_imputed = impute_with_mean(self.variant_calls_slice.numbers_of_alternate_alleles) 108 | calls_imputed = self.get_imputed_calls() 109 | scaler = RobustScaler() 110 | calls_imputed_scaled = np.nan_to_num(scaler.fit_transform(calls_imputed)) 111 | start = timer() 112 | 113 | n_components = 10 114 | if calls_imputed_scaled.shape[1] < n_components: 115 | n_components = calls_imputed_scaled.shape[1] 116 | 117 | try: 118 | pca_model = PCA(n_components=n_components, whiten=False, svd_solver='randomized', iterated_power=6).fit(calls_imputed_scaled) 119 | pca_result = pca_model.transform(calls_imputed_scaled) 120 | log.debug("==== PCA calculation time: %f", timer() - start) 121 | pca_result_combined = np.concatenate((sample_ids, pca_result), axis=1) 122 | return pca_result_combined, pca_model.explained_variance_ratio_ 123 | 124 | except ValueError: 125 | return False 126 | 127 | 128 | def umap(self, n_neighbors=15): 129 | """Calculate UMAP for a variant matrix array 130 | 131 | Args: 132 | n_neighbors (int): `n_neighbors` parameter of umap.UMAP() method 133 | 134 | Returns: 135 | numpy.ndarray: PCA result aligned with the sample IDs in the first column 136 | """ 137 | 138 | sample_ids = np.array(self.variant_calls_slice.samples_selected_mapped).reshape((-1, 1)).copy() 139 | #calls_imputed = impute_with_mean(self.variant_calls_slice.numbers_of_alternate_alleles) 140 | calls_imputed = self.get_imputed_calls() 141 | 142 | start = timer() 143 | umap_result = umap.UMAP(n_components = 2, n_neighbors=n_neighbors, metric='euclidean', random_state=42).fit_transform(calls_imputed) # , random_state=42, densmap=True , min_dist=0.5 , dens_lambda=5 144 | log.debug("==== UMAP calculation time: %f", timer() - start) 145 | umap_result_combined = np.concatenate((sample_ids, umap_result), axis=1) 146 | return umap_result_combined -------------------------------------------------------------------------------- /divbrowse/lib/annotation_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | from timeit import default_timer as timer 3 | import json 4 | 5 | import allel 6 | import numpy as np 7 | import pandas as pd 8 | import zarr 9 | #from pandarallel import pandarallel 10 | from sklearn.decomposition import PCA 11 | from sklearn.preprocessing import StandardScaler 12 | 13 | #pandarallel.initialize(nb_workers=3, progress_bar=True) 14 | 15 | 16 | class AnnotationData: 17 | 18 | def __init__(self, config, gd): 19 | 20 | self.config = config 21 | self.gd = gd 22 | self.datadir = config['datadir'] 23 | 24 | self.available = { 25 | 'gff3': False 26 | } 27 | 28 | if config['gff3']['filename']: 29 | self.path_gff3 = self.datadir + config['gff3']['filename'] 30 | if os.path.exists(self.path_gff3): 31 | self.available['gff3'] = True 32 | else: 33 | exit('ERROR: the configured path for the GFF3 file does not exist or is not accessible') 34 | 35 | self.metadata_gff3 = { 36 | 'has_gff3': self.available['gff3'], 37 | 'count_genes': 0, 38 | } 39 | self.genes_list = [] 40 | 41 | self.genes_list_json_dumped = json.dumps({'genes': False}) 42 | 43 | if self.available['gff3']: 44 | self._load_gff3_data() 45 | 46 | 47 | def _load_gff3_data(self): 48 | 49 | gff3_chromosome_labels = self.config['gff3_chromosome_labels'] 50 | self.metadata_gff3['gff3_to_vcf_chromosome_mapping'] = {str(v): str(k) for k, v in self.config['gff3_chromosome_labels'].items()} 51 | 52 | self.chrom_gff3_map = { str(key): str(value) for (key, value) in gff3_chromosome_labels.items()} 53 | 54 | key_confidence = 'primary_confidence_class' 55 | if self.config['gff3']['key_confidence'] and self.config['gff3']['key_confidence'] is not False: 56 | key_confidence = str(self.config['gff3']['key_confidence']) 57 | 58 | key_ontology = 'Ontology_term' 59 | if self.config['gff3']['key_ontology'] and self.config['gff3']['key_ontology'] is not False: 60 | key_ontology = str(self.config['gff3']['key_ontology']) 61 | 62 | gff3_attributes = ['ID', key_confidence, 'Parent', 'description', key_ontology] 63 | 64 | additional_attributes_keys = [] 65 | if self.config['gff3']['additional_attributes_keys']: 66 | additional_attributes_keys = str(self.config['gff3']['additional_attributes_keys']).split(',') 67 | gff3_attributes.extend(additional_attributes_keys) 68 | 69 | self.genes = allel.gff3_to_dataframe(self.path_gff3, attributes=gff3_attributes) 70 | self.genes.rename(columns={key_confidence: "primary_confidence_class", key_ontology: "Ontology_term"}, inplace=True) 71 | 72 | genes_only = self.genes.loc[(self.genes['type'] == 'gene')] # gene or transcript 73 | 74 | self.metadata_gff3['count_genes'] = int(len(genes_only.index)) 75 | self.metadata_gff3.update(dict(self.config['gff3'])) 76 | 77 | # list of genes with descriptions and start+end positions 78 | #genes_with_descriptions = self.genes.loc[(self.genes['type'] == self.config['gff3']['feature_type_with_description'])] 79 | 80 | genes_with_descriptions = self.genes.loc[ self.genes['type'].isin(self.config['gff3']['main_feature_types_for_genes_track']) ] 81 | #genes_with_descriptions = genes_with_descriptions.loc[ genes_with_descriptions['type'].isin(self.config['gff3']['main_feature_types_for_genes_track']) ] 82 | genes_with_descriptions = genes_with_descriptions.loc[ genes_with_descriptions['seqid'].isin(list(self.chrom_gff3_map.values())) ] 83 | 84 | 85 | def count_exon_variants(row): 86 | _chromosome_gff3 = row['seqid'] 87 | _chromosome_vcf = self.metadata_gff3['gff3_to_vcf_chromosome_mapping'][ _chromosome_gff3 ] 88 | number_of_variants = self.gd.count_variants_in_window(str(_chromosome_vcf), row['start'], row['end']) 89 | return number_of_variants 90 | 91 | def count_genic_variants(row): 92 | print(str(row['seqid'])+' / '+str(row['start'])) 93 | _chromosome_gff3 = row['seqid'] 94 | _chromosome_vcf = self.metadata_gff3['gff3_to_vcf_chromosome_mapping'][ _chromosome_gff3 ] 95 | number_of_variants = self.gd.count_variants_in_window(str(_chromosome_vcf), row['start'], row['end']) 96 | exons = self.genes.loc[(self.genes['seqid'] == row['seqid']) & (self.genes['start'] >= row['start']) & (self.genes['end'] <= row['end']) & (self.genes['type'] == 'exon')] 97 | number_of_exon_variants = exons.apply(count_exon_variants, axis=1) 98 | return pd.Series( [number_of_variants, number_of_exon_variants.sum() ], index=['number_of_variants', 'number_of_exon_variants']) 99 | 100 | 101 | if self.config['gff3']['count_exon_variants'] is True: 102 | 103 | geneStatsCacheFilename = self.datadir+'____gene_stats_.hdf5' 104 | try: 105 | gene_list = pd.read_hdf(geneStatsCacheFilename, key='s') 106 | print("++++ Loaded Pandas Dataframe for gene stats") 107 | 108 | genes_number_of_variants = gene_list[ ['number_of_variants', 'number_of_exon_variants'] ].copy() 109 | merged = pd.concat([genes_with_descriptions, genes_number_of_variants], axis=1) 110 | #gene_list = merged[ ['ID', 'seqid', 'start', 'end', 'primary_confidence_class', 'description', 'Ontology_term', 'number_of_variants', 'number_of_exon_variants'] ].copy() 111 | gene_list = merged.copy() 112 | 113 | except FileNotFoundError: 114 | start = timer() 115 | 116 | print("++++ Count variants on genes and exons..........") 117 | #genes_number_of_variants = genes_with_descriptions.parallel_apply(count_genic_variants, axis=1, result_type='expand') 118 | #genes_with_descriptions = genes_with_descriptions.iloc[0:50] 119 | genes_number_of_variants = genes_with_descriptions.apply(count_genic_variants, axis=1, result_type='expand') 120 | print("==== genes_with_descriptions.parallel_apply() calculation time: ", timer() - start) 121 | merged = pd.concat([genes_with_descriptions, genes_number_of_variants], axis=1) 122 | 123 | #gene_list = merged[ ['ID', 'seqid', 'start', 'end', 'primary_confidence_class', 'description', 'number_of_variants', 'number_of_exon_variants'] ].copy() 124 | gene_list = merged.copy() 125 | gene_list.to_hdf(geneStatsCacheFilename, key='s', mode='w', complevel=5, complib='blosc:zstd') 126 | 127 | else: 128 | gene_list = genes_with_descriptions 129 | 130 | 131 | self.genes_with_stats = gene_list 132 | genes_list = gene_list.to_dict('split') 133 | 134 | genes_grouped_by_seqid = genes_only.groupby(self.genes.seqid) 135 | list_chrom_gff3 = pd.unique(genes_only['seqid']).tolist() 136 | genes_start_positions = {} 137 | for _chr in list_chrom_gff3: 138 | _curr_group = genes_grouped_by_seqid.get_group(_chr) 139 | genes_start_positions[_chr] = pd.DataFrame(_curr_group['start'].drop_duplicates(keep='first')) 140 | genes_start_positions[_chr].set_index('start', drop=False, inplace=True) 141 | genes_start_positions[_chr] = genes_start_positions[_chr].sort_index() 142 | 143 | self.genes_list = genes_list 144 | self.genes_list_json_dumped = json.dumps({'genes': genes_list}) 145 | self.genes_grouped_by_seqid = genes_grouped_by_seqid 146 | self.genes_start_positions = genes_start_positions 147 | 148 | 149 | def get_nearest_gene_start_pos(self, chrom, pos): 150 | seqid = self.chrom_gff3_map[chrom] 151 | #nearest = self.genes_start_positions[seqid].index.get_loc(pos, method='nearest') 152 | nearest = self.genes_start_positions[seqid].index.get_indexer([pos], method='nearest')[0] 153 | start_pos = int(self.genes_start_positions[seqid].iloc[nearest].start) 154 | _genes = self.genes_grouped_by_seqid.get_group(seqid) 155 | return _genes.loc[(_genes['start'] == start_pos)] 156 | -------------------------------------------------------------------------------- /divbrowse/lib/utils.py: -------------------------------------------------------------------------------- 1 | from simplejson import JSONEncoder 2 | 3 | #from flask.json.provider import JSONProvider 4 | import orjson 5 | 6 | RED = '\033[31m' 7 | RESET = '\033[0m' 8 | 9 | def print_error(msg): 10 | print(f"{RED}ERROR: {msg}{RESET}") 11 | pass 12 | 13 | class ApiError(Exception): 14 | status_code = 200 15 | def __init__(self, message, status_code=None, payload=None): 16 | Exception.__init__(self) 17 | self.message = message 18 | if status_code is not None: 19 | self.status_code = status_code 20 | self.payload = payload 21 | 22 | def to_dict(self): 23 | rv = dict(self.payload or ()) 24 | rv['status'] = 'error' 25 | rv['message'] = self.message 26 | return rv 27 | 28 | 29 | class StrictEncoder(JSONEncoder): 30 | def __init__(self, *args, **kwargs): 31 | kwargs["allow_nan"] = False 32 | kwargs["ignore_nan"] = True 33 | super().__init__(*args, **kwargs) 34 | 35 | class ORJSONEncoder: 36 | 37 | def __init__(self, **kwargs): 38 | # eventually take into consideration when serializing 39 | self.options = kwargs 40 | 41 | def encode(self, obj): 42 | # decode back to str, as orjson returns bytes 43 | return orjson.dumps(obj, option=orjson.OPT_NON_STR_KEYS).decode('utf-8') -------------------------------------------------------------------------------- /divbrowse/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='divbrowse', 5 | packages=['divbrowse'], 6 | include_package_data=True, 7 | install_requires=[ 8 | 'flask', 9 | ], 10 | ) -------------------------------------------------------------------------------- /divbrowse/wsgi.py: -------------------------------------------------------------------------------- 1 | from divbrowse.server import create_app 2 | 3 | app = create_app() -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('.')) 16 | sys.path.insert(0, os.path.abspath('../../')) 17 | 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = 'DivBrowse' 22 | copyright = '2023, Patrick König' 23 | author = 'Patrick König' 24 | 25 | # The full version, including alpha/beta/rc tags 26 | release = '1.0.1' 27 | 28 | 29 | # -- General configuration --------------------------------------------------- 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = [ 35 | 'autoapi.extension', 36 | 'sphinx.ext.napoleon', 37 | 'sphinx_click' 38 | ] 39 | 40 | 41 | autoapi_dirs = ['../../divbrowse/'] 42 | autoapi_ignore = ['*migrations*', '*/conf.py', '*/divbrowse_wsgi.py'] 43 | autoapi_add_toctree_entry = True 44 | 45 | 46 | 47 | # Add any paths that contain templates here, relative to this directory. 48 | templates_path = ['_templates'] 49 | 50 | # List of patterns, relative to source directory, that match files and 51 | # directories to ignore when looking for source files. 52 | # This pattern also affects html_static_path and html_extra_path. 53 | #exclude_patterns = ['_build', '*.pyc', '__pycache__'] 54 | 55 | 56 | # -- Options for HTML output ------------------------------------------------- 57 | 58 | # The theme to use for HTML and HTML Help pages. See the documentation for 59 | # a list of builtin themes. 60 | # 61 | html_theme = 'sphinx_rtd_theme' # alabaster 62 | 63 | # Add any paths that contain custom static files (such as style sheets) here, 64 | # relative to this directory. They are copied after the builtin static files, 65 | # so a file named "default.css" will overwrite the builtin "default.css". 66 | html_static_path = ['_static'] -------------------------------------------------------------------------------- /docs/source/configuration.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | Configuration 3 | ============= 4 | 5 | The configuration of your DivBrowse instance is managed by a YAML file: 6 | 7 | .. code-block:: yaml 8 | 9 | metadata: 10 | general_description: 11 | vcf_doi: 12 | vcf_reference_genome_doi: 13 | gff3_doi: 14 | 15 | 16 | datadir: /opt/shape/uwsgi/shape/data/ 17 | 18 | 19 | variants: 20 | zarr_dir: SNP_matrix_WGS_300_samples.vcf.zarr 21 | sample_id_mapping_filename: 22 | 23 | 24 | gff3: 25 | filename: 26 | additional_attributes_keys: biotype,gene_id 27 | feature_type_with_description: gene 28 | count_exon_variants: true 29 | key_confidence: 30 | key_ontology: Ontology_term 31 | main_feature_types_for_genes_track: 32 | - gene 33 | - pseudogene 34 | - ncRNA_gene 35 | external_link_ontology_term: https://www.ebi.ac.uk/QuickGO/term/{ID} 36 | external_links: 37 | - feature_attribute: ID 38 | url: https://some.external.resource.org/{FEATURE_ID} 39 | linktext: Open this gene in an external resource 40 | 41 | 42 | chromosome_labels: 43 | 1: 1H 44 | 2: 2H 45 | 3: 3H 46 | 4: 4H 47 | 5: 5H 48 | 6: 6H 49 | 7: 7H 50 | 0: Un 51 | 52 | 53 | gff3_chromosome_labels: 54 | 1: chr1H 55 | 2: chr2H 56 | 3: chr3H 57 | 4: chr4H 58 | 5: chr5H 59 | 6: chr6H 60 | 7: chr7H 61 | 0: chrUn 62 | 63 | 64 | centromeres_positions: 65 | 1: 205502676 66 | 2: 305853815 67 | 3: 271947776 68 | 4: 282386439 69 | 5: 205989812 70 | 6: 260041240 71 | 7: 328847863 72 | 0: 0 73 | 74 | 75 | blast: 76 | active: false 77 | galaxy_server_url: https://galaxy-web.ipk-gatersleben.de 78 | galaxy_user: 79 | galaxy_pass: 80 | galaxy_tool_id: ncbi_blastn_wrapper_barley 81 | blast_database: morex_v3 82 | blast_type: megablast 83 | blast_result_to_vcf_chromosome_mapping: 84 | chr1H: 1 85 | chr2H: 2 86 | chr3H: 3 87 | chr4H: 4 88 | chr5H: 5 89 | chr6H: 6 90 | chr7H: 7 91 | chrUn: 0 92 | 93 | 94 | 95 | .. include:: ../../backend/divbrowse.config.yml.example 96 | :literal: 97 | :code: yaml -------------------------------------------------------------------------------- /docs/source/images/divbrowse_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IPK-BIT/divbrowse/db9b5d13ae709813220ba7f25017072631a56971/docs/source/images/divbrowse_logo.png -------------------------------------------------------------------------------- /docs/source/images/divbrowse_main_gui_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IPK-BIT/divbrowse/db9b5d13ae709813220ba7f25017072631a56971/docs/source/images/divbrowse_main_gui_screenshot.png -------------------------------------------------------------------------------- /docs/source/images/paper_figures_general_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IPK-BIT/divbrowse/db9b5d13ae709813220ba7f25017072631a56971/docs/source/images/paper_figures_general_architecture.png -------------------------------------------------------------------------------- /docs/source/images/paper_figures_usage_concept.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IPK-BIT/divbrowse/db9b5d13ae709813220ba7f25017072631a56971/docs/source/images/paper_figures_usage_concept.png -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to DivBrowse's documentation! 2 | ===================================== 3 | 4 | .. image:: https://readthedocs.org/projects/divbrowse/badge/?version=latest 5 | :target: https://divbrowse.readthedocs.io/?badge=latest 6 | :alt: Documentation Status 7 | 8 | .. toctree:: 9 | :maxdepth: 2 10 | :caption: Contents: 11 | 12 | introduction.rst 13 | installation.rst 14 | usage.rst 15 | javascript_api.rst 16 | tutorial.rst 17 | configuration.rst 18 | API Reference <../autoapi#http://> 19 | 20 | 21 | Indices and tables 22 | ================== 23 | 24 | * :ref:`genindex` 25 | * :ref:`search` 26 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Installation 3 | ============ 4 | 5 | Installation via pip 6 | ==================== 7 | 8 | It is strongly recommended to create a virtual environment beforehand, e.g. via conda or virtualenv. 9 | 10 | You can then install DivBrowse from PyPI via pip:: 11 | 12 | $ pip install divbrowse 13 | 14 | 15 | 16 | Installation via Docker or Podman 17 | ================================= 18 | 19 | Pull the image:: 20 | 21 | $ sudo docker pull ipkbit/divbrowse 22 | 23 | or with Podman:: 24 | 25 | $ podman pull docker.io/ipkbit/divbrowse 26 | 27 | Start a container from the image:: 28 | 29 | $ sudo docker run --name divbrowse -v /home/myusername/divbrowse_data:/opt/divbrowse/:Z -it -p 8080:8080 ipkbit/divbrowse:latest 30 | 31 | or with Podman:: 32 | 33 | $ podman run --name divbrowse -v /home/myusername/divbrowse_data:/opt/divbrowse/:Z -it -p 8080:8080 ipkbit/divbrowse:latest 34 | 35 | 36 | 37 | 38 | Installation from sources on Github via conda 39 | ============================================= 40 | 41 | You can install the latest version from the main branch of the GitHub repository with the following shell commands:: 42 | 43 | $ git clone https://github.com/IPK-BIT/divbrowse 44 | $ cd divbrowse 45 | $ conda env create -f environment.yml -------------------------------------------------------------------------------- /docs/source/introduction.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Introduction 3 | ============ 4 | 5 | What is DivBrowse? 6 | ================== 7 | 8 | DivBrowse is a web-based tool to visualize and analyse very large SNP matrices based on VCF files. -------------------------------------------------------------------------------- /docs/source/javascript_api.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | Javascript API 3 | ============== 4 | 5 | DivBrowse's Javascript API can be used to control the samples to be displayed. 6 | It also provides the possibility to get all sample IDs that have been selected in the scatterplot of a PCA/UMAP data analysis. 7 | 8 | Control the displayed samples 9 | ============================= 10 | 11 | First you need to instantiate the DivBrowse instance: 12 | 13 | 14 | .. code-block:: javascript 15 | 16 | document.addEventListener("DOMContentLoaded", function(event) { 17 | const config = { 18 | apiBaseUrl: 'http://divbrowse.myinstitute.org' 19 | } 20 | const divbrowse_instance = divbrowse.startApp('divbrowse-container', config); 21 | }); 22 | 23 | 24 | Then your are able to set a list with sample IDs to be displayed in DivBrowse via the setSamples() method: 25 | 26 | .. code-block:: javascript 27 | 28 | const samples = [ 29 | { id: 'BRIDGE_WGS_FT219' }, 30 | { id: 'BRIDGE_WGS_FT262' }, 31 | { id: 'BRIDGE_WGS_FT340' } 32 | ]; 33 | 34 | divbrowse_instance.setSamples(samples); 35 | 36 | 37 | Control the displayed samples and change displayed labels of the samples 38 | ======================================================================== 39 | 40 | Sometimes the internal sample IDs in the VCF files are either not human readable or are different from what you want to display to the user. 41 | In this case, you can change the display name of each sample as follows: 42 | 43 | .. code-block:: javascript 44 | 45 | const samples = [ 46 | { id: 'BRIDGE_WGS_FT219', displayName: 'FT 219' }, 47 | { id: 'BRIDGE_WGS_FT262', displayName: 'FT 262' }, 48 | { id: 'BRIDGE_WGS_FT340', displayName: 'FT 340' } 49 | ]; 50 | 51 | divbrowse_instance.setSamples(samples); 52 | 53 | 54 | Control the displayed samples and provided a link as label 55 | ========================================================== 56 | 57 | It is also possible to apply HTML-tags as labels for the displayed samples: 58 | 59 | .. code-block:: javascript 60 | 61 | const samples = [ 62 | { id: 'BRIDGE_WGS_FT219', link: 'FT 219' }, 63 | { id: 'BRIDGE_WGS_FT262', link: 'FT 262' }, 64 | { id: 'BRIDGE_WGS_FT340', link: 'FT 340' } 65 | ]; 66 | 67 | divbrowse_instance.setSamples(samples); 68 | 69 | 70 | Getting back IDs of samples that have been selected in a scatterplot 71 | ==================================================================== 72 | 73 | Users of DivBrowse can perform dimensionality reduction of variant calls. In the resulting scatterplots, the user can select a range of 74 | samples. DivBrowse's Javascript API provides a callback function that is automatically called when the user makes a selection of samples. 75 | You can use this callback function as follows: 76 | 77 | 78 | .. code-block:: javascript 79 | 80 | const samplesSelectedCallback = (selectedSamples) => { 81 | /* 82 | The function argument `selectedSamples` is an array 83 | of sample IDs that have been selected in the scatterplot 84 | */ 85 | console.log('The following samples have been selected in DivBrowse: ', selectedSamples); 86 | } 87 | 88 | document.addEventListener("DOMContentLoaded", function(event) { 89 | const config = { 90 | apiBaseUrl: 'http://divbrowse.myinstitute.org', 91 | samplesSelectedCallback: samplesSelectedCallback 92 | } 93 | const divbrowse_instance = divbrowse.startApp('divbrowse-container', config); 94 | }); -------------------------------------------------------------------------------- /docs/source/tutorial.rst: -------------------------------------------------------------------------------- 1 | ========================================================== 2 | Tutorial - How to setup a DivBrowse instance for your data 3 | ========================================================== 4 | 5 | Here we describe the setup of a DivBrowse instance with a VCF file of Homo sapiens step by step. 6 | 7 | Please make sure that you have installed DivBrowse properly. See :doc:`installation` for more information about the installation. 8 | 9 | 10 | Setup a directory structure for the new instance 11 | ================================================ 12 | 13 | - Create a new project directory for your DivBrowse instance: :: 14 | 15 | $ mkdir ~/divbrowse_instance_homo_sapiens 16 | $ cd ~/divbrowse_instance_homo_sapiens 17 | 18 | - Create a new directory `data` within your previously created project directory and switch to this new directory: :: 19 | 20 | $ mkdir data 21 | $ cd data 22 | 23 | 24 | Obtaining VCF files from the European Nucleotide Archive 25 | ======================================================== 26 | 27 | - Download the VCF files for all chromosomes from the following EBI-ENA web page: https://www.ebi.ac.uk/ena/browser/view/PRJEB30460 28 | - Concatenate all 23 VCF files into one combined VCF file with bcftools: 29 | 30 | .. code-block:: 31 | 32 | $ bcftools concat --output-type z -o ./ALL.shapeit2_integrated_v1a.GRCh38.20181129.phased.vcf.gz \ 33 | ALL.chr1.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz ALL.chr2.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz \ 34 | ALL.chr3.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz ALL.chr4.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz \ 35 | ALL.chr5.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz ALL.chr6.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz \ 36 | ALL.chr7.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz ALL.chr8.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz \ 37 | ALL.chr9.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz ALL.chr10.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz \ 38 | ALL.chr11.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz ALL.chr12.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz \ 39 | ALL.chr13.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz ALL.chr14.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz \ 40 | ALL.chr15.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz ALL.chr16.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz \ 41 | ALL.chr17.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz ALL.chr18.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz \ 42 | ALL.chr19.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz ALL.chr20.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz \ 43 | ALL.chr21.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz ALL.chr22.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz \ 44 | ALL.chrX.shapeit2_integrated_v1a.GRCh38.20181129.GRCh38.phased.vcf.gz 45 | 46 | - Convert the concatenated VCF file to a Zarr archive with the DivBrowse CLI: :: 47 | 48 | $ divbrowse vcf2zarr --path-vcf ./ALL.shapeit2_integrated_v1a.GRCh38.20181129.phased.vcf.gz --path-zarr ./ALL.shapeit2_integrated_v1a.GRCh38.20181129.phased.zarr 49 | 50 | 51 | 52 | Obtaining the gene annotation from ensemble.org 53 | =============================================== 54 | 55 | - Download the file ``Homo_sapiens.GRCh38.107.chr.gff3.gz`` from: http://ftp.ensembl.org/pub/release-107/gff3/homo_sapiens/ :: 56 | 57 | $ wget http://ftp.ensembl.org/pub/release-107/gff3/homo_sapiens/Homo_sapiens.GRCh38.107.chr.gff3.gz 58 | 59 | - Uncompress the gzipped file: :: 60 | 61 | $ gzip -d Homo_sapiens.GRCh38.107.chr.gff3.gz 62 | 63 | - Now you should have ``ALL.shapeit2_integrated_v1a.GRCh38.20181129.phased.zarr`` and ``Homo_sapiens.GRCh38.107.chr.gff3`` in the path ``~/divbrowse_instance_homo_sapiens/data`` 64 | 65 | 66 | Setup configuration file 67 | ======================== 68 | 69 | - Switch back to the project directory: :: 70 | 71 | $ cd ~/divbrowse_instance_homo_sapiens 72 | 73 | - And download the configuration file ``divbrowse.config.yml`` from the Github repository: :: 74 | 75 | $ wget https://raw.githubusercontent.com/IPK-BIT/divbrowse/main/examples/homo_sapiens/divbrowse.config.yml 76 | 77 | 78 | - Make sure to adjust the option ``datadir`` in the configuration file ``divbrowse.config.yml`` to your local conditions: 79 | 80 | .. code-block:: yaml 81 | 82 | datadir: /home/myusername/divbrowse_instance_homo_sapiens/data/ 83 | 84 | 85 | 86 | Start intergrated web server to serve the instance 87 | ================================================== 88 | 89 | - Now you can start the configured instance by executing the following command: :: 90 | 91 | $ divbrowse start 92 | -------------------------------------------------------------------------------- /docs/source/usage.rst: -------------------------------------------------------------------------------- 1 | ===== 2 | Usage 3 | ===== 4 | 5 | After installation you can use the DivBrowse CLI to start a local instance of DivBrowse on your computer. 6 | You only have to provide a *.vcf/*.vcf.gz file and a *.gff/*.gff3 file in a subdirectory. 7 | Within the subdirectory you can start a DivBrowse instance via the following command:: 8 | 9 | $ divbrowse start --infer-config 10 | 11 | An attempt is made to infer the configuration from the data provided. 12 | A Zarr archive of the provided VCF file is automatically created and saved in the same directory as the VCF file with `.zarr` appended to the original filename of the provided VCF file. 13 | 14 | If you want to improve and customize the configuration, you can take the example config YAML file `divbrowse.config.yml.example` 15 | `(click to open)`_ file from the GitHub repository, rename it to `divbrowse.config.yml` and edit it to fit your requirements. 16 | 17 | .. _(click to open): https://raw.githubusercontent.com/IPK-BIT/divbrowse/main/divbrowse/divbrowse.config.yml.example 18 | 19 | __ 20 | 21 | If you provide a manually written `divbrowse.config.yml`, you can start a DivBrowse instance using this customized configuration by executing the following CLI command in the directory consisting your custom config YAML file:: 22 | 23 | $ divbrowse start 24 | 25 | 26 | Manual conversion of VCF file to Zarr format 27 | ============================================ 28 | 29 | The CLI allows to convert VCF files to Zarr archives independently: 30 | 31 | $ divbrowse vcf2zarr --path-vcf /path/to/my_variants.vcf.gz --path-zarr /path/to/my_variants.zarr 32 | 33 | The created Zarr archive can then be used in the configuration settings file `divbrowse.config.yml` as data source for the variant data: 34 | 35 | .. code-block:: yaml 36 | 37 | datadir: /path/to/ 38 | 39 | variants: 40 | zarr_dir: my_variants.zarr 41 | 42 | 43 | DivBrowse CLI reference 44 | ======================= 45 | 46 | .. click:: divbrowse.cli:main 47 | :prog: divbrowse 48 | :nested: full 49 | :commands: start 50 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: divbrowse_dev 2 | channels: 3 | - bioconda 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - python>=3.9.0 8 | - click 9 | - numpy==1.21.1 10 | - pandas 11 | - flask 12 | - scikit-learn 13 | - scikit-allel 14 | - bioblend==0.16.0 15 | - pyyaml 16 | - zarr 17 | - pytables 18 | - waitress 19 | - simplejson 20 | - orjson 21 | - umap-learn -------------------------------------------------------------------------------- /examples/homo_sapiens/divbrowse.config.yml: -------------------------------------------------------------------------------- 1 | metadata: 2 | general_description: Biallelic SNVs called from 2,548 human samples across 26 populations from the 1000 Genomes Project, called directly against GRCh38 3 | vcf_doi: https://www.ebi.ac.uk/ena/browser/view/PRJEB30460 4 | vcf_reference_genome_doi: https://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.39 5 | gff3_doi: https://www.ensembl.org/Homo_sapiens/Info/Index 6 | 7 | datadir: /opt/divbrowse/uwsgi/1000g_human/data/ 8 | 9 | variants: 10 | zarr_dir: ALL.shapeit2_integrated_v1a.GRCh38.20181129.phased.zarr 11 | sample_id_mapping_filename: 12 | 13 | 14 | gff3: 15 | filename: Homo_sapiens.GRCh38.107.chr.gff3 16 | additional_attributes_keys: biotype,gene_id 17 | key_confidence: false 18 | key_ontology: Ontology_term 19 | feature_type_with_description: gene 20 | main_feature_types_for_genes_track: 21 | - gene 22 | - pseudogene 23 | - ncRNA_gene 24 | count_exon_variants: true 25 | external_link_ontology_term: 26 | external_links: 27 | - feature_attribute: gene_id 28 | url: https://www.ensembl.org/Homo_sapiens/Gene/Summary?g={FEATURE_ID} 29 | linktext: Open this gene on ensembl.org 30 | 31 | 32 | chromosome_labels: 33 | chr1: chr1 34 | chr2: chr2 35 | chr3: chr3 36 | chr4: chr4 37 | chr5: chr5 38 | chr6: chr6 39 | chr7: chr7 40 | chr8: chr8 41 | chr9: chr9 42 | chr10: chr10 43 | chr11: chr11 44 | chr12: chr12 45 | chr13: chr13 46 | chr14: chr14 47 | chr15: chr15 48 | chr16: chr16 49 | chr17: chr17 50 | chr18: chr18 51 | chr19: chr19 52 | chr20: chr20 53 | chr21: chr21 54 | chr22: chr22 55 | chrX: chrX 56 | 57 | 58 | gff3_chromosome_labels: 59 | chr1: 1 60 | chr2: 2 61 | chr3: 3 62 | chr4: 4 63 | chr5: 5 64 | chr6: 6 65 | chr7: 7 66 | chr8: 8 67 | chr9: 9 68 | chr10: 10 69 | chr11: 11 70 | chr12: 12 71 | chr13: 13 72 | chr14: 14 73 | chr15: 15 74 | chr16: 16 75 | chr17: 17 76 | chr18: 18 77 | chr19: 19 78 | chr20: 20 79 | chr21: 21 80 | chr22: 22 81 | chrX: X 82 | 83 | 84 | centromeres_positions: 85 | chr1: 0 86 | chr2: 0 87 | chr3: 0 88 | chr4: 0 89 | chr5: 0 90 | chr6: 0 91 | chr7: 0 92 | chr8: 0 93 | chr9: 0 94 | chr10: 0 95 | chr11: 0 96 | chr12: 0 97 | chr13: 0 98 | chr14: 0 99 | chr15: 0 100 | chr16: 0 101 | chr17: 0 102 | chr18: 0 103 | chr19: 0 104 | chr20: 0 105 | chr21: 0 106 | chr22: 0 107 | chrX: 0 108 | 109 | blast: 110 | active: false 111 | galaxy_server_url: 112 | galaxy_user: 113 | galaxy_pass: 114 | galaxy_tool_id: 115 | blast_database: 116 | blast_type: megablast 117 | blast_result_to_vcf_chromosome_mapping: -------------------------------------------------------------------------------- /examples/hordeum_vulgare/divbrowse.config.yml: -------------------------------------------------------------------------------- 1 | metadata: 2 | general_description: Unimputed SNP variants for 22626 BRIDGE project panel genotypes with reference to Morex V3 3 | vcf_doi: 4 | vcf_reference_genome_doi: http://doi.org/10.5447/ipk/2021/3 5 | gff3_doi: http://doi.org/10.5447/ipk/2021/3 6 | 7 | datadir: /opt/divbrowse/uwsgi/bridge_all_samples_morex_v3/data/ 8 | 9 | variants: 10 | zarr_dir: 201006_diversity_gbs_morex_v3_bcftools_filtered_corrected_SNPEFF_annotated.vcf.zarr 11 | sample_id_mapping_filename: lut_sampleid_vcfid.csv 12 | 13 | gff3: 14 | filename: Hv_Morex.pgsb.Jul2020.gff3 15 | key_confidence: primary_confidence_class 16 | key_ontology: Ontology_term 17 | feature_type_with_description: mRNA 18 | main_feature_type_for_genes_track: mRNA 19 | count_exon_variants: false 20 | external_link_ontology_term: https://www.ebi.ac.uk/QuickGO/term/{ID} 21 | external_links: 22 | - feature_type: mRNA 23 | feature_attribute: ID 24 | url: https://apex.ipk-gatersleben.de/apex/f?p=284:58:::::P58_GENE_NAME:{FEATURE_ID} 25 | linktext: Open this gene in BARLEX 26 | 27 | 28 | chromosome_labels: 29 | chr1H: 1H 30 | chr2H: 2H 31 | chr3H: 3H 32 | chr4H: 4H 33 | chr5H: 5H 34 | chr6H: 6H 35 | chr7H: 7H 36 | chrUn: Un 37 | 38 | 39 | gff3_chromosome_labels: 40 | chr1H: chr1H 41 | chr2H: chr2H 42 | chr3H: chr3H 43 | chr4H: chr4H 44 | chr5H: chr5H 45 | chr6H: chr6H 46 | chr7H: chr7H 47 | chrUn: chrUn 48 | 49 | 50 | centromeres_positions: 51 | chr1H: 206486643 52 | chr2H: 301293086 53 | chr3H: 267852507 54 | chr4H: 276149121 55 | chr5H: 204878572 56 | chr6H: 256319444 57 | chr7H: 328847192 58 | chrUn: 0 59 | 60 | 61 | blast: 62 | active: true 63 | galaxy_server_url: https://galaxy-web.ipk-gatersleben.de 64 | galaxy_user: myusername 65 | galaxy_pass: mypassword 66 | galaxy_tool_id: ncbi_blastn_wrapper_barley 67 | blast_database: morex_v3 68 | blast_type: megablast 69 | blast_result_to_vcf_chromosome_mapping: 70 | chr1H: chr1H 71 | chr2H: chr2H 72 | chr3H: chr3H 73 | chr4H: chr4H 74 | chr5H: chr5H 75 | chr6H: chr6H 76 | chr7H: chr7H 77 | chrUn: chrUn -------------------------------------------------------------------------------- /examples/mus_musculus/divbrowse.config.yml: -------------------------------------------------------------------------------- 1 | metadata: 2 | general_description: Sanger Institute Mouse Genomes Project v5 - SNP calls from version 5 of the Mouse Genome Project at the Wellcome Trust Sanger Institute. Specifically, this project describes the variants of 36 mouse strains aligned against the reference mouse genome sequence GRCm38. 3 | vcf_doi: https://www.ebi.ac.uk/eva/?eva-study=PRJEB11471 4 | vcf_reference_genome_doi: https://www.ncbi.nlm.nih.gov/assembly/GCF_000001635.20/ 5 | gff3_doi: https://nov2020.archive.ensembl.org/Mus_musculus/Info/Index 6 | 7 | datadir: /opt/divbrowse/uwsgi/sanger_institute_mouse_genomes_project_v5/data/ 8 | 9 | variants: 10 | zarr_dir: mgp.v5.merged.snps_all.dbSNP142.noALT_X.done.vcf.zarr 11 | sample_id_mapping_filename: 12 | 13 | 14 | gff3: 15 | filename: Mus_musculus.GRCm38.102.chr.gff3 16 | additional_attributes_keys: biotype,gene_id 17 | key_confidence: false 18 | key_ontology: Ontology_term 19 | feature_type_with_description: gene 20 | main_feature_types_for_genes_track: 21 | - gene 22 | - pseudogene 23 | - ncRNA_gene 24 | count_exon_variants: false 25 | external_link_ontology_term: 26 | external_links: 27 | - feature_attribute: gene_id 28 | url: https://nov2020.archive.ensembl.org/Mus_musculus/Gene/Summary?g={FEATURE_ID} 29 | linktext: Open this gene on ensembl.org 30 | 31 | 32 | chromosome_labels: 33 | 1: 1 34 | 2: 2 35 | 3: 3 36 | 4: 4 37 | 5: 5 38 | 6: 6 39 | 7: 7 40 | 8: 8 41 | 9: 9 42 | 10: 10 43 | 11: 11 44 | 12: 12 45 | 13: 13 46 | 14: 14 47 | 15: 15 48 | 16: 16 49 | 17: 17 50 | 18: 18 51 | 19: 19 52 | X: X 53 | Y: Y 54 | MT: MT 55 | 56 | 57 | gff3_chromosome_labels: 58 | 1: 1 59 | 2: 2 60 | 3: 3 61 | 4: 4 62 | 5: 5 63 | 6: 6 64 | 7: 7 65 | 8: 8 66 | 9: 9 67 | 10: 10 68 | 11: 11 69 | 12: 12 70 | 13: 13 71 | 14: 14 72 | 15: 15 73 | 16: 16 74 | 17: 17 75 | 18: 18 76 | 19: 19 77 | X: X 78 | Y: Y 79 | MT: MT 80 | 81 | centromeres_positions: 82 | 1: 0 83 | 2: 0 84 | 3: 0 85 | 4: 0 86 | 5: 0 87 | 6: 0 88 | 7: 0 89 | 8: 0 90 | 9: 0 91 | 10: 0 92 | 11: 0 93 | 12: 0 94 | 13: 0 95 | 14: 0 96 | 15: 0 97 | 16: 0 98 | 17: 0 99 | 18: 0 100 | 19: 0 101 | X: 0 102 | Y: 0 103 | MT: 0 104 | 105 | blast: 106 | active: false 107 | galaxy_server_url: 108 | galaxy_user: 109 | galaxy_pass: 110 | galaxy_tool_id: 111 | blast_database: 112 | blast_type: megablast 113 | blast_result_to_vcf_chromosome_mapping: -------------------------------------------------------------------------------- /frontend/.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": ["svelte.svelte-vscode"] 3 | } 4 | -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 | # Svelte + Vite 2 | 3 | This template should help get you started developing with Svelte in Vite. 4 | 5 | ## Recommended IDE Setup 6 | 7 | [VS Code](https://code.visualstudio.com/) + [Svelte](https://marketplace.visualstudio.com/items?itemName=svelte.svelte-vscode). 8 | 9 | ## Need an official Svelte framework? 10 | 11 | Check out [SvelteKit](https://github.com/sveltejs/kit#readme), which is also powered by Vite. Deploy anywhere with its serverless-first approach and adapt to various platforms, with out of the box support for TypeScript, SCSS, and Less, and easily-added support for mdsvex, GraphQL, PostCSS, Tailwind CSS, and more. 12 | 13 | ## Technical considerations 14 | 15 | **Why use this over SvelteKit?** 16 | 17 | - It brings its own routing solution which might not be preferable for some users. 18 | - It is first and foremost a framework that just happens to use Vite under the hood, not a Vite app. 19 | 20 | This template contains as little as possible to get started with Vite + Svelte, while taking into account the developer experience with regards to HMR and intellisense. It demonstrates capabilities on par with the other `create-vite` templates and is a good starting point for beginners dipping their toes into a Vite + Svelte project. 21 | 22 | Should you later need the extended capabilities and extensibility provided by SvelteKit, the template has been structured similarly to SvelteKit so that it is easy to migrate. 23 | 24 | **Why `global.d.ts` instead of `compilerOptions.types` inside `jsconfig.json` or `tsconfig.json`?** 25 | 26 | Setting `compilerOptions.types` shuts out all other types not explicitly listed in the configuration. Using triple-slash references keeps the default TypeScript setting of accepting type information from the entire workspace, while also adding `svelte` and `vite/client` type information. 27 | 28 | **Why include `.vscode/extensions.json`?** 29 | 30 | Other templates indirectly recommend extensions via the README, but this file allows VS Code to prompt the user to install the recommended extension upon opening the project. 31 | 32 | **Why enable `checkJs` in the JS template?** 33 | 34 | It is likely that most cases of changing variable types in runtime are likely to be accidental, rather than deliberate. This provides advanced typechecking out of the box. Should you like to take advantage of the dynamically-typed nature of JavaScript, it is trivial to change the configuration. 35 | 36 | **Why is HMR not preserving my local component state?** 37 | 38 | HMR state preservation comes with a number of gotchas! It has been disabled by default in both `svelte-hmr` and `@sveltejs/vite-plugin-svelte` due to its often surprising behavior. You can read the details [here](https://github.com/rixo/svelte-hmr#svelte-hmr). 39 | 40 | If you have state that's important to retain within a component, consider creating an external store which would not be replaced by HMR. 41 | 42 | ```js 43 | // store.js 44 | // An extremely simple external store 45 | import { writable } from 'svelte/store' 46 | export default writable(0) 47 | ``` 48 | -------------------------------------------------------------------------------- /frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | DivBrowse 7 | 8 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
24 | 25 | 26 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /frontend/jsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "moduleResolution": "Node", 4 | "target": "ESNext", 5 | "module": "ESNext", 6 | /** 7 | * svelte-preprocess cannot figure out whether you have 8 | * a value or a type, so tell TypeScript to enforce using 9 | * `import type` instead of `import` for Types. 10 | */ 11 | "importsNotUsedAsValues": "error", 12 | "isolatedModules": true, 13 | "resolveJsonModule": true, 14 | /** 15 | * To have warnings / errors of the Svelte compiler at the 16 | * correct position, enable source maps by default. 17 | */ 18 | "sourceMap": true, 19 | "esModuleInterop": true, 20 | "skipLibCheck": true, 21 | "forceConsistentCasingInFileNames": true, 22 | /** 23 | * Typecheck JS in `.svelte` and `.js` files by default. 24 | * Disable this if you'd like to use dynamic types. 25 | */ 26 | "checkJs": true, 27 | "baseUrl": ".", 28 | "paths": { 29 | "@/*": ["src/*"] 30 | } 31 | }, 32 | /** 33 | * Use global.d.ts instead of compilerOptions.types 34 | * to avoid limiting type declarations. 35 | */ 36 | "include": ["src/**/*.d.ts", "src/**/*.js", "src/**/*.svelte"] 37 | } 38 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "frontend_vite", 3 | "private": true, 4 | "version": "0.0.1", 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "build": "vite build", 9 | "preview": "vite preview" 10 | }, 11 | "devDependencies": { 12 | "@rollup/plugin-replace": "^5.0.2", 13 | "@sveltejs/vite-plugin-svelte": "^2.0.0", 14 | "copyfiles": "^2.4.1", 15 | "less": "^4.1.3", 16 | "postcss": "^8.4.17", 17 | "svelte": "^3.54.0", 18 | "svelte-headless-table": "^0.16.2", 19 | "svelte-preprocess": "^5.0.0", 20 | "vite": "^4.0.0" 21 | }, 22 | "dependencies": { 23 | "@sveltejs/svelte-virtual-list": "^3.0.1", 24 | "axios": "^1.1.2", 25 | "d3-scale": "^4.0.2", 26 | "dataframe-js": "^1.4.4", 27 | "fuse.js": "^6.4.6", 28 | "lodash": "^4.17.15", 29 | "plotly.js-dist": "^2.4.1", 30 | "plotly.js-gl2d-dist": "^2.17.1", 31 | "svelte-range-slider-pips": "^2.0.3", 32 | "svelte-simple-datatables": "^0.2.3", 33 | "svelte-simple-modal": "^1.4.1", 34 | "sveltejs-tippy": "^3.0.0", 35 | "tippy.js": "^6.3.7" 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /frontend/src/app.css: -------------------------------------------------------------------------------- 1 | :root { 2 | font-family: Inter, Avenir, Helvetica, Arial, sans-serif; 3 | font-size: 16px; 4 | line-height: 24px; 5 | font-weight: 400; 6 | 7 | color-scheme: light dark; 8 | color: rgba(255, 255, 255, 0.87); 9 | background-color: #242424; 10 | 11 | font-synthesis: none; 12 | text-rendering: optimizeLegibility; 13 | -webkit-font-smoothing: antialiased; 14 | -moz-osx-font-smoothing: grayscale; 15 | -webkit-text-size-adjust: 100%; 16 | } 17 | 18 | a { 19 | font-weight: 500; 20 | color: #646cff; 21 | text-decoration: inherit; 22 | } 23 | a:hover { 24 | color: #535bf2; 25 | } 26 | 27 | body { 28 | margin: 0; 29 | display: flex; 30 | place-items: center; 31 | min-width: 320px; 32 | min-height: 100vh; 33 | } 34 | 35 | h1 { 36 | font-size: 3.2em; 37 | line-height: 1.1; 38 | } 39 | 40 | .card { 41 | padding: 2em; 42 | } 43 | 44 | #app { 45 | max-width: 1280px; 46 | margin: 0 auto; 47 | padding: 20px; 48 | text-align: center; 49 | } 50 | 51 | button { 52 | border-radius: 8px; 53 | border: 1px solid transparent; 54 | padding: 0.6em 1.2em; 55 | font-size: 1em; 56 | font-weight: 500; 57 | font-family: inherit; 58 | background-color: #1a1a1a; 59 | cursor: pointer; 60 | transition: border-color 0.25s; 61 | } 62 | button:hover { 63 | border-color: #646cff; 64 | } 65 | button:focus, 66 | button:focus-visible { 67 | outline: 4px auto -webkit-focus-ring-color; 68 | } 69 | 70 | @media (prefers-color-scheme: light) { 71 | :root { 72 | color: #213547; 73 | background-color: #ffffff; 74 | } 75 | a:hover { 76 | color: #747bff; 77 | } 78 | button { 79 | background-color: #f9f9f9; 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /frontend/src/components/ModalMulti.svelte: -------------------------------------------------------------------------------- 1 | 95 | 96 | 97 |
closeHighestModal()}> 98 | 99 | {#each $allModals as modal, index} 100 |
101 |
{}}> 102 | closeModal(index)} viewBox="0 0 14 14"> 103 | 104 | 105 | 106 | 107 |
108 | { closeModal(index) } } /> 109 |
110 |
111 |
112 | {/each} 113 | 114 |
115 | 116 | 117 | 118 | 186 | 187 | 188 | -------------------------------------------------------------------------------- /frontend/src/components/Modals.svelte: -------------------------------------------------------------------------------- 1 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /frontend/src/components/Tippy.svelte: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /frontend/src/components/modals/Clustermap.svelte: -------------------------------------------------------------------------------- 1 | 66 | 67 | 68 |
69 |
Clustermap
70 | 71 |
72 | 73 | {#if showLoadingAnimation} 74 |
75 | 76 |
77 | {/if} 78 | 79 |
80 | 81 |
82 | 83 |
84 | 85 |
86 | 87 | 88 |
89 | 90 |
91 | {#if base64imageStr} 92 | clustermap 93 | {/if} 94 |
95 | 96 |
97 | 98 | -------------------------------------------------------------------------------- /frontend/src/components/modals/DataAnalysisAndExport.svelte: -------------------------------------------------------------------------------- 1 | 110 | 111 | 112 |
113 |
Data Analysis and Export
114 | 115 | 124 | 125 |
126 | 127 | 128 | 129 | 130 | 131 | {#if useVariantFilter} 132 | 133 | {/if} 134 |
135 | 136 | 137 |
138 | 139 | 140 | 141 |
142 | 143 | 144 |
145 | 146 | -------------------------------------------------------------------------------- /frontend/src/components/modals/DataSummary.svelte: -------------------------------------------------------------------------------- 1 | 15 | 16 |
17 |
Data Summary
18 | 19 | 20 | 21 | 22 | 23 | 24 | {#if controller.metadata.dataset_descriptions.vcf_doi != null && controller.metadata.dataset_descriptions.vcf_doi !== ""} 25 | 26 | 27 | 28 | 29 | {/if} 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 55 | 56 | {#if controller.metadata.gff3.has_gff3 === true} 57 | 58 | 59 | 60 | 61 | {/if} 62 | 63 | 64 | 67 | 68 |
General description:{controller.metadata.dataset_descriptions.general_description}
DOI/URL of VCF:{controller.metadata.dataset_descriptions.vcf_doi}
DOI/URL of reference genome:{controller.metadata.dataset_descriptions.vcf_reference_genome_doi}
DOI/URL of genome annotation:{controller.metadata.dataset_descriptions.gff3_doi}
Number of genotypes:{controller.metadata.count_genotypes}
Number of variants:{controller.metadata.count_variants}
Number of variants per chromosome: 49 |
50 | {#each controller.metadata.chromosomes as chrom} 51 | {chrom.label}: {chrom.number_of_variants}
52 | {/each} 53 |
54 |
Number of genes provided by genome annotation:{controller.metadata.gff3.count_genes}
Genotypes list: 65 |
{@html controller.metadata.samples.map(x => sampleDisplayName(x) ).join('
')}
66 |
69 |
70 | 71 | -------------------------------------------------------------------------------- /frontend/src/components/modals/DummyModal.svelte: -------------------------------------------------------------------------------- 1 | 9 | 10 |
11 |

Dummy Modal Content

12 | 13 | 14 |
15 | 16 | 17 |
18 | {test} 19 |
20 | 21 | -------------------------------------------------------------------------------- /frontend/src/components/modals/GeneDetails.svelte: -------------------------------------------------------------------------------- 1 | 58 | 59 | 60 |
61 |
62 | Gene Details 63 | {#if isGeneBookmarked} 64 | 65 | {/if} 66 |
67 | 68 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | {#if controller.metadata.gff3.key_confidence !== undefined && controller.metadata.gff3.key_confidence !== false} 105 | 106 | 107 | 108 | 109 | {/if} 110 | 111 | {#if controller.metadata.gff3.count_exon_variants !== undefined && controller.metadata.gff3.count_exon_variants === true} 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | {/if} 121 |
ID{result.ID}
Type{result.type}
Chromosome{result.seqid}
Position{result.start} - {result.end}
Strand{result.strand}
Description{result.description}
Primary confidence class{result.primary_confidence_class}
Variants on whole gene{result.number_of_variants}
Variants on exons of this gene{result.number_of_exon_variants}
122 | 123 | {#if externalLinks.length > 0} 124 | 132 | {/if} 133 | 134 | {#if ontologyLinks.length > 0} 135 | 136 | 144 | {/if} 145 | 146 | 147 |
148 | {#if isGeneBookmarked} 149 | 150 | {:else} 151 | 152 | {/if} 153 |
154 | 155 |
156 | 157 | -------------------------------------------------------------------------------- /frontend/src/components/modals/GeneSearch.svelte: -------------------------------------------------------------------------------- 1 | 130 | 131 |
132 | 133 |
Gene Search
134 | 135 |
136 | 137 |
138 | 139 |
140 | 141 | 142 | 143 |
144 | 145 | {#if showLoadingAnimation} 146 |
147 | 148 |
149 | {/if} 150 |
151 | 152 |
153 | doSearch()}> 154 | 155 |
156 | 157 | {#if $store.searchInInterval} 158 |
159 | 160 | 165 | 166 | 167 | 168 | 169 |
170 | {/if} 171 | 172 |
173 | 174 |

175 | {#if $store.query.length > 0 && resultRowCount !== null && showGenesFoundText} 176 | {resultRowCount} genes were found that match your search query. 177 | {/if}   178 |

179 | 180 | 181 | {#if result !== null} 182 | 183 |
184 | 185 | 186 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 210 |
211 | 212 | {/if} 213 | 214 |
215 | 216 | -------------------------------------------------------------------------------- /frontend/src/components/modals/GeneSearchModalTable.svelte: -------------------------------------------------------------------------------- 1 | 185 | 186 | 187 | 188 | 189 | {#each $headerRows as headerRow (headerRow.id)} 190 | 191 | 192 | {#each headerRow.cells as cell (cell.id)} 193 | 194 | 197 | 198 | {/each} 199 | 200 | 201 | {/each} 202 | 203 | 204 | {#each $pageRows as row (row.id)} 205 | 206 | 207 | {#each row.cells as cell (cell.id)} 208 | 209 | 220 | 221 | {/each} 222 | 223 | 224 | {/each} 225 | 226 |
195 | {@html cell.render()} 196 |
210 | {#if cell.id == 'ID'} 211 | 212 | {:else if cell.id == 'description'} 213 | 214 | {:else if cell.id == '__view'} 215 | goToPos(cell.value.seqid, cell.value.start, cell.value.end) }>view 216 | {:else} 217 | 218 | {/if} 219 |
227 | 228 |
229 | 230 | Page {$pageIndex + 1} out of {$pageCount} 231 | 232 |
233 | 234 | 235 | -------------------------------------------------------------------------------- /frontend/src/components/modals/GeneSearchModalTableFast.svelte: -------------------------------------------------------------------------------- 1 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | {#if controller.metadata.gff3.count_exon_variants !== undefined && controller.metadata.gff3.count_exon_variants === true} 68 | 69 | {/if} 70 | 71 | 72 | 73 | 74 | {#each dataSlice as row} 75 | 76 | 79 | 80 | 83 | 84 | 85 | 86 | 87 | {#if controller.metadata.gff3.count_exon_variants !== undefined && controller.metadata.gff3.count_exon_variants === true} 88 | 89 | {/if} 90 | 93 | 94 | {/each} 95 | 96 |
IDTypeDescriptionChromosomeStart positionEnd positionPrimary
confidence
class
Number of
variants (on exons)
77 | {row.ID} 78 | {row.type} 81 | {row.description} 82 | {row.seqid}{row.start}{row.end}{row.primary_confidence_class}{row.number_of_variants} ({row.number_of_exon_variants}) 91 | goToPos(row.seqid, row.start, row.end) }>view 92 |
97 | 98 |
99 | 100 |
101 | 102 |
103 | 104 |
105 | 106 | Page {$pagination.currentPage + 1} out of {$pagination.pages} 107 | 108 |
109 |
110 | 111 | 112 | -------------------------------------------------------------------------------- /frontend/src/components/modals/GeneSearchModalTable__backup.svelte: -------------------------------------------------------------------------------- 1 | 128 | 129 | 130 | 131 | 132 | {#each $headerRows as headerRow (headerRow.id)} 133 | 134 | 135 | {#each headerRow.cells as cell (cell.id)} 136 | 137 | 140 | 141 | {/each} 142 | 143 | 144 | {/each} 145 | 146 | 147 | {#each $pageRows as row (row.id)} 148 | 149 | 150 | {#each row.cells as cell (cell.id)} 151 | 152 | 163 | 164 | {/each} 165 | 166 | 167 | {/each} 168 | 169 |
138 | {@html cell.render()} 139 |
153 | {#if cell.id == 'ID'} 154 | 155 | {:else if cell.id == 'description'} 156 | 157 | {:else if cell.id == '__view'} 158 | goToPos(cell.value.seqid, cell.value.start, cell.value.end) }>view 159 | {:else} 160 | 161 | {/if} 162 |
170 | 171 |
172 | 173 | Page {$pageIndex + 1} out of {$pageCount} 174 | 175 |
176 | 177 | 178 | -------------------------------------------------------------------------------- /frontend/src/components/modals/Settings.svelte: -------------------------------------------------------------------------------- 1 | 18 | 19 |
20 |
Settings
21 | 22 |
23 | 24 | 28 |
29 | 30 |
31 | 32 | 33 |
34 | 35 |
36 | 37 | -------------------------------------------------------------------------------- /frontend/src/components/modals/SnpEffAnnotation.svelte: -------------------------------------------------------------------------------- 1 | 45 | 46 | 47 |
48 |
SnpEff annotation
49 | 50 | Position: {position}

51 | 52 |
53 | 54 | 55 | {#each snpeffAnnAttrs as attr} 56 | 57 | {/each} 58 | 59 | 60 | {#each lines as line} 61 | 62 | {#each line as attr_entry} 63 | 64 | {/each} 65 | 66 | {/each} 67 | 68 |
{attr}
{attr_entry}
69 |
70 | 71 |
72 | 73 | -------------------------------------------------------------------------------- /frontend/src/components/modals/SortSamples.svelte: -------------------------------------------------------------------------------- 1 | 29 | 30 | 31 |
32 |
Sort Samples
33 | 34 |
35 |
Sort mode
36 |
37 | 38 | 39 |
40 |
41 | 42 | 43 |
44 |
45 | 46 | 47 |
48 |
49 | 50 | {#if $sortSettings.sortmode !== 'none'} 51 |
52 |
Sort order
53 |
54 | 55 | 56 |
57 |
58 | 59 | 60 |
61 |
62 | {/if} 63 | 64 |
65 | 66 |
67 | 68 | -------------------------------------------------------------------------------- /frontend/src/components/modals/VariantFilter.svelte: -------------------------------------------------------------------------------- 1 | 8 | 9 | 10 |
11 |
Filter Variants
12 | 13 |
14 |
15 | 16 | 17 |
18 | {#if $variantFilterSettings.filterByMaf} 19 |
20 | 21 |
22 |
23 | Minor allele frequency (MAF) must be between {Math.round($variantFilterSettings.maf[0] * 100)}% and {Math.round($variantFilterSettings.maf[1] * 100)}% 24 |
25 | {/if} 26 |
27 | 28 |
29 |
30 | 31 | 32 |
33 | {#if $variantFilterSettings.filterByMissingFreq} 34 |
35 | 36 |
37 |
38 | Missing rate must be between {Math.round($variantFilterSettings.missingFreq[0] * 100)}% and {Math.round($variantFilterSettings.missingFreq[1] * 100)}% 39 |
40 | {/if} 41 |
42 | 43 | 44 |
45 |
46 | 47 | 48 |
49 | {#if $variantFilterSettings.filterByHeteroFreq} 50 |
51 | 52 |
53 |
54 | Heterozygosity frequency must be between {Math.round($variantFilterSettings.heteroFreq[0] * 100)}% and {Math.round($variantFilterSettings.heteroFreq[1] * 100)}% 55 |
56 | {/if} 57 |
58 | 59 | 60 |
61 |
62 | 63 | 64 |
65 | {#if $variantFilterSettings.filterByVcfQual} 66 |
67 | 68 |
69 |
70 | QUAL value must be between {$variantFilterSettings.vcfQual[0]} and {$variantFilterSettings.vcfQual[1]} 71 |
72 | {/if} 73 |
74 | 75 | 76 |
77 | 78 |
79 | 80 | -------------------------------------------------------------------------------- /frontend/src/components/tracks/ChromosomeMinimap.svelte: -------------------------------------------------------------------------------- 1 | 67 | 68 | 69 |
70 | 71 |
Position on chromosome
72 | 73 |
74 | 75 | 76 |
77 | 78 | {#if currChromCentromerePos > 0} 79 |
80 |
81 | {:else} 82 |
83 | {/if} 84 | 85 |
86 | 87 |
showing {data.variants_coordinates.length} of {prettyPos(currChromEnd)} variants in the range {prettyPos(data.coordinate_first)} - {prettyPos(data.coordinate_last)}
88 | 89 |
90 | 91 |
92 | 93 |
94 | 95 | 96 | -------------------------------------------------------------------------------- /frontend/src/components/tracks/GenomicRegionGrid.svelte: -------------------------------------------------------------------------------- 1 | 44 | 45 | 46 | 47 | {#each gridPositionsY as position} 48 | {position.toLocaleString()} 49 | 50 | {/each} 51 | 52 | 53 | -------------------------------------------------------------------------------- /frontend/src/components/tracks/HeterozygousCallsFrequencyHeatmap.svelte: -------------------------------------------------------------------------------- 1 | 46 | 47 | 48 |
Heterozygosity indicator
49 | {#each hets as het, i} 50 |   51 | {/each} 52 |
53 | 54 | 55 | -------------------------------------------------------------------------------- /frontend/src/components/tracks/MeanPairwiseDifferenceHeatmap.svelte: -------------------------------------------------------------------------------- 1 | 40 | 41 | 42 |
Pairwise diff. indicator
43 | {#each hets as het, i} 44 |   45 | {/each} 46 |
47 | 48 | 49 | -------------------------------------------------------------------------------- /frontend/src/components/tracks/MinorAlleleFrequencyHeatmap.svelte: -------------------------------------------------------------------------------- 1 | 45 | 46 | 47 |
MAF indicator
48 | {#each mafs as maf, i} 49 |   50 | {/each} 51 |
52 | 53 | 54 | -------------------------------------------------------------------------------- /frontend/src/components/tracks/Positions.svelte: -------------------------------------------------------------------------------- 1 | 28 | 29 | 30 |
Variant positions
31 | {#each data.variants_coordinates as position} 32 | {position} 33 | {/each} 34 |
35 | 36 | 37 | -------------------------------------------------------------------------------- /frontend/src/components/tracks/Reference.svelte: -------------------------------------------------------------------------------- 1 | 61 | 62 |
Reference allele
63 | {#each reference as nucleotide, i} 64 | {#if nucleotide.length == 1} 65 | 66 | {:else} 67 | 68 | {/if} 69 | {/each} 70 |
71 | 72 | 73 | -------------------------------------------------------------------------------- /frontend/src/components/tracks/SampleVariants.svelte: -------------------------------------------------------------------------------- 1 | 62 | 63 |
64 |
65 | 66 | {#if sampleData.status == 'single'} 67 |
{@html sampleDisplayName(sampleId, sampleData)}
68 | {:else if sampleData.status == 'group-root'} 69 |
{sampleId}
70 | {:else} 71 |
72 | 73 | {#if sampleData.isLastNode === true} 74 | 75 | {:else} 76 | 77 | {/if} 78 | 79 | 80 | {sampleId} 81 |
82 | {/if} 83 | 84 | {#if data.filtered_variants_coordinates.length > 0} 85 | {#each data.variants_coordinates as variant_coordinate, i} 86 | 87 | {#if data.calls.get(sampleId) !== null} 88 | {#if ploidy === 1} 89 | 90 | {:else if ploidy === 2} 91 | 92 | {/if} 93 | {/if} 94 | 95 | {/each} 96 | {/if} 97 | 98 |
99 | 100 |
101 | 102 | 103 | 104 |
105 | 106 | 107 | -------------------------------------------------------------------------------- /frontend/src/components/tracks/SampleVariantsMinimap.svelte: -------------------------------------------------------------------------------- 1 | 234 | 235 |
236 |
Compressed view
237 |
canvasOnScrollDebounced(event.target.scrollTop) } style="max-height: 399px; overflow-y: scroll; flex-grow: 1; margin-top: 1px;"> 238 | 239 |
240 |
241 | 242 | -------------------------------------------------------------------------------- /frontend/src/components/tracks/SnpEff.svelte: -------------------------------------------------------------------------------- 1 | 80 | 81 | 82 |
SnpEff annotations
83 | {#if data.snpeff_variants !== undefined} 84 | {#each data.variants_coordinates as position} 85 | 86 | 87 | 88 | 89 | 90 | {/each} 91 | {/if} 92 |
93 | 94 | 95 | -------------------------------------------------------------------------------- /frontend/src/components/utils/LoadingAnimation.svelte: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 |
7 | 8 | 9 | -------------------------------------------------------------------------------- /frontend/src/lib/DataLoader.js: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | import cloneDeep from 'lodash/cloneDeep'; 3 | 4 | export default class DataLoader { 5 | 6 | constructor(config, eventbus) { 7 | this.config = config; 8 | this.eventbus = eventbus; 9 | 10 | this.payload = {}; 11 | this.data = {}; 12 | this.samplesVisibleInViewport = []; 13 | } 14 | 15 | generateVariantCallsMap() { 16 | this.callsMap = new Map(this.config.samples.map(item => [item, null])); 17 | } 18 | 19 | mergeLazyLoadedVariantCalls(calls) { 20 | let lazyLoadedCallsMap = new Map(Object.entries(calls)); 21 | this.callsMap = new Map([...this.callsMap, ...lazyLoadedCallsMap]); 22 | } 23 | 24 | mergeLazyLoadedVariantCallsMetadata(calls_metadata) { 25 | for (let [metadata_key, metadata_values] of Object.entries(calls_metadata)) { 26 | if (this.callsMetadataMaps[metadata_key] === undefined) { 27 | // create new map for this metadata-key 28 | this.callsMetadataMaps[metadata_key] = new Map(this.config.samples.map(item => [item, null])); 29 | } 30 | 31 | // merge data 32 | let metadataMap = new Map(Object.entries(metadata_values)); 33 | this.callsMetadataMaps[metadata_key] = new Map([...this.callsMetadataMaps[metadata_key], ...metadataMap]); 34 | } 35 | } 36 | 37 | checkIfSamplesAlreadyLazyLoaded(sampleIds) { 38 | let samplesToLazyLoad = []; 39 | for (let sampleId of sampleIds) { 40 | if (this.callsMap.get(sampleId) === null) { 41 | samplesToLazyLoad.push(sampleId); 42 | } 43 | } 44 | return samplesToLazyLoad; 45 | } 46 | 47 | lazyLoadChunkOfVariantCalls(number) { 48 | 49 | } 50 | 51 | lazyLoadVariantCalls(sampleIds, callbackEarlyExit) { 52 | 53 | this.samplesVisibleInViewport = sampleIds; 54 | 55 | sampleIds = this.checkIfSamplesAlreadyLazyLoaded(sampleIds); 56 | 57 | if (sampleIds.length == 0) { 58 | if (typeof callbackEarlyExit === "function") { 59 | callbackEarlyExit(); 60 | } 61 | return false; 62 | } 63 | 64 | this.eventbus.emit('loading:animation', {status: true}); 65 | 66 | this.payload['samples'] = sampleIds; 67 | 68 | this.loadVariantCalls(this.payload, _data => { 69 | this.mergeLazyLoadedVariantCalls(_data.calls); 70 | this.mergeLazyLoadedVariantCallsMetadata(_data.calls_metadata); 71 | 72 | this.data['__lazyLoaded'] = true; 73 | this.data['calls'] = this.callsMap; 74 | this.data['calls_metadata'] = this.callsMetadataMaps; 75 | 76 | this.eventbus.emit('loading:animation', {status: false}); 77 | this.eventbus.emit('data:display:changed', this.data); 78 | }); 79 | } 80 | 81 | loadVariantsAndCalls(payload, callback) { 82 | 83 | this.payload = payload; 84 | this.generateVariantCallsMap(); 85 | 86 | // init/reset some state vars 87 | this.data = {}; 88 | this.callsMetadataMaps = {}; 89 | 90 | let payloadVariantCalls = cloneDeep(payload); 91 | payloadVariantCalls['samples'] = payloadVariantCalls['samples'].slice(0,30); 92 | 93 | //console.info(this.samplesVisibleInViewport); 94 | 95 | if (this.samplesVisibleInViewport.length > 0) { 96 | payloadVariantCalls['samples'] = this.samplesVisibleInViewport; 97 | } 98 | 99 | const requestVariants = axios.post(this.config.apiBaseUrl+'/variants', payload); 100 | const requestVariantCalls = axios.post(this.config.apiBaseUrl+'/variant_calls', payloadVariantCalls); 101 | 102 | axios.all([requestVariants, requestVariantCalls]).then( 103 | axios.spread(({data:variants}, {data:variant_calls}) => { 104 | 105 | this.mergeLazyLoadedVariantCalls(variant_calls.calls); 106 | this.mergeLazyLoadedVariantCallsMetadata(variant_calls.calls_metadata); 107 | 108 | this.data = variants; 109 | this.data['calls'] = this.callsMap; 110 | this.data['calls_metadata'] = this.callsMetadataMaps; 111 | 112 | callback(this.data); 113 | }) 114 | ); 115 | } 116 | 117 | 118 | loadVariantCalls(payload, callback) { 119 | 120 | let endpoint = this.config.apiBaseUrl+'/variant_calls'; 121 | axios.post(endpoint, payload).then((response) => { 122 | callback(response.data); 123 | }) 124 | .catch(error => { 125 | console.log(error); 126 | //self.raiseError('Error: Could not load any data from the server / backend.') 127 | //this.eventbus.emit('loading:animation', {status: false}); 128 | }); 129 | } 130 | 131 | } -------------------------------------------------------------------------------- /frontend/src/main.js: -------------------------------------------------------------------------------- 1 | import App from './App.svelte'; 2 | 3 | const useShadowDOM = true; 4 | 5 | function startApp(containerId, config) { 6 | 7 | let _containerId = '#'+containerId; 8 | let container = document.querySelector(_containerId); 9 | let target; 10 | 11 | if (useShadowDOM) { 12 | target = container.attachShadow({ mode: "open" }); 13 | } else { 14 | target = container; 15 | } 16 | 17 | //let _config = JSON.parse(JSON.stringify(config)); 18 | let _config = Object.assign({}, config); 19 | 20 | let app = new App({ 21 | target: target, 22 | props: { 23 | config: _config, 24 | appId: 'divbrowse-'+containerId, 25 | rootElem: target 26 | } 27 | }); 28 | return app; 29 | } 30 | 31 | window.divbrowse = { 32 | 'startApp': startApp 33 | } -------------------------------------------------------------------------------- /frontend/src/utils/eventbus.js: -------------------------------------------------------------------------------- 1 | import cloneDeep from 'lodash/cloneDeep'; 2 | 3 | export default class EventEmitter { 4 | constructor() { 5 | this.events = {}; 6 | } 7 | 8 | on(eventName, callback) { 9 | if(!this.events[eventName] ) { 10 | this.events[eventName] = []; 11 | } 12 | this.events[eventName].push(callback); 13 | } 14 | 15 | emit(eventName, data) { 16 | //console.log("%c+++ Event: "+eventName, "color: blue;"); 17 | //console.log(data); 18 | const event = this.events[eventName]; 19 | if(event) { 20 | event.forEach(callback => { 21 | callback.call(null, cloneDeep(data)); 22 | }); 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /frontend/src/utils/helpers.js: -------------------------------------------------------------------------------- 1 | const debounce = (callback, wait) => { 2 | let timeoutId = null; 3 | return (...args) => { 4 | window.clearTimeout(timeoutId); 5 | timeoutId = window.setTimeout(() => { 6 | callback.apply(null, args); 7 | }, wait); 8 | }; 9 | } 10 | 11 | const isHetero = arr => arr.some(item => item !== arr[0]); 12 | 13 | const numberOfAltAllelesFactory = { 14 | getFunction: function(_ploidy) { 15 | let ploidy = _ploidy; 16 | 17 | const numberOfAlternateAlleles = (calls) => { 18 | if (ploidy == 2) { 19 | let variantType = 2; 20 | if (calls[0] == -1) { 21 | variantType = -1; 22 | } else if (isHetero(calls)) { 23 | variantType = 1; 24 | } else if (calls.reduce((a, b) => a + b, 0) === 0) { 25 | variantType = 0; 26 | } 27 | return variantType; 28 | } 29 | 30 | if (ploidy == 1) { 31 | let variantType = 2; 32 | if (calls === 0) { 33 | variantType = 0; 34 | } 35 | return variantType; 36 | } 37 | } 38 | 39 | return numberOfAlternateAlleles; 40 | } 41 | } 42 | 43 | export { debounce, numberOfAltAllelesFactory }; -------------------------------------------------------------------------------- /frontend/src/utils/logging.js: -------------------------------------------------------------------------------- 1 | const log = (msg) => { 2 | if (import.meta.env.MODE === 'development') { 3 | console.log(msg); 4 | } 5 | } 6 | 7 | export { log }; -------------------------------------------------------------------------------- /frontend/src/utils/store.js: -------------------------------------------------------------------------------- 1 | import { setContext, getContext } from "svelte"; 2 | import { writable } from "svelte/store"; 3 | 4 | let appId; 5 | 6 | const key = "__stores"; 7 | 8 | 9 | function createGenesBookmarksStore() { 10 | 11 | const localStorageKey = appId+'-genes-bookmarks'; 12 | 13 | const store = writable(new Set()); 14 | 15 | const bookmarkedGenes = localStorage.getItem(localStorageKey); 16 | if (bookmarkedGenes !== null) { 17 | const bookmarkedGenesSet = new Set(JSON.parse(bookmarkedGenes)); 18 | store.set(bookmarkedGenesSet); 19 | } 20 | 21 | const bookmarkGene = (id) => { 22 | store.update($ => { 23 | $.add(id); 24 | localStorage.setItem(localStorageKey, JSON.stringify(Array.from($))); 25 | return $; 26 | }); 27 | } 28 | 29 | const unbookmarkGene = (id) => { 30 | store.update($ => { 31 | $.delete(id); 32 | localStorage.setItem(localStorageKey, JSON.stringify(Array.from($))); 33 | return $; 34 | }); 35 | } 36 | 37 | return { 38 | ...store, 39 | bookmarkGene, 40 | unbookmarkGene 41 | } 42 | } 43 | 44 | 45 | function createStores() { 46 | 47 | const stores = { 48 | settings: writable({ 49 | statusShowMinimap: false, 50 | zoomX: false, 51 | zoomY: false, 52 | statusColorblindMode: false, 53 | variantDisplayMode: 'reference_mismatch' 54 | }), 55 | variantWidth: writable(20), 56 | groups: writable({}), 57 | snpPosHighlights: writable({}), 58 | sortSettings: writable({ 59 | sortmode: 'none', 60 | sortorder: undefined 61 | }), 62 | variantFilterSettings: writable({ 63 | maf: [0.05,0.5], 64 | missingFreq: [0,0.1], 65 | heteroFreq: [0,0.1], 66 | vcfQual: [500,1000] 67 | }), 68 | filteredVariantsCoordinates: writable([]), 69 | geneSearch: writable({ 70 | query: '', 71 | searchInInterval: false, 72 | selectedChromosome: undefined, 73 | startpos: null, 74 | endpos: null 75 | }), 76 | genesBookmarks: createGenesBookmarksStore(), 77 | } 78 | 79 | setContext(key, stores); 80 | 81 | return stores; 82 | } 83 | 84 | export default function getStores() { 85 | 86 | appId = getContext('app').app().appId; 87 | 88 | const stores = getContext(key); 89 | 90 | if (!stores) { 91 | return createStores(); 92 | } 93 | 94 | return stores; 95 | } -------------------------------------------------------------------------------- /frontend/src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | /// 3 | -------------------------------------------------------------------------------- /frontend/vite.config.js: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite' 2 | import { svelte } from '@sveltejs/vite-plugin-svelte' 3 | import sveltePreprocess from 'svelte-preprocess'; 4 | import { resolve } from 'path'; 5 | import replace from '@rollup/plugin-replace'; 6 | 7 | // https://vitejs.dev/config/ 8 | export default defineConfig({ 9 | build: { 10 | sourcemap: true, 11 | lib: { 12 | entry: resolve(__dirname, 'src/main.js'), 13 | name: 'divbrowse', 14 | fileName: 'divbrowse', 15 | formats: ['es'], 16 | }, 17 | }, 18 | resolve: { 19 | alias: { 20 | '@': resolve(__dirname, 'src'), 21 | //'#root': resolve(__dirname, 'src'), 22 | } 23 | }, 24 | plugins: [ 25 | replace({ 26 | 'process.env.NODE_ENV': JSON.stringify('production') 27 | }), 28 | svelte({ 29 | preprocess: [sveltePreprocess({ typescript: true })], 30 | emitCss: false 31 | }) 32 | ], 33 | }) 34 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "divbrowse" 3 | version = "1.1.0" 4 | keywords = ["bioinformatics", "genomics", "visualization", "variant call format", "vcf", "biodiversity"] 5 | description = "A web application for interactive visualization and analysis of genotypic variant matrices" 6 | readme = "README.md" 7 | authors = ["Patrick König "] 8 | license = "MIT" 9 | include = ["docs/", "divbrowse/static/*", "divbrowse/static/build/*"] 10 | homepage = "https://divbrowse.ipk-gatersleben.de/" 11 | repository = "https://github.com/IPK-BIT/divbrowse" 12 | documentation = "https://divbrowse.readthedocs.io/" 13 | 14 | [tool.poetry.dependencies] 15 | python = "^3.9" 16 | click = "^8.0.1" 17 | numpy = "^1.21.1" 18 | pandas = "^1.3.0" 19 | flask = "^2.0.1" 20 | #scikit-learn = "^0.24.2" 21 | scikit-learn = "^1.2.0" 22 | scikit-allel = "^1.3.5" 23 | seaborn = "^0.12.2" 24 | bioblend = "^0.16.0" 25 | pyyaml = "^5.4.1" 26 | zarr = "^2.8.3" 27 | tables = "^3.6.1" 28 | simplejson = "^3.17.3" 29 | orjson = "^3.8.5" 30 | umap-learn = "^0.5.2" 31 | waitress = "2.1.2" 32 | sphinx = {version = "^4.0.2", optional = true} 33 | sphinx-autoapi = {version = "^1.6.0", optional = true} 34 | sphinx_rtd_theme = {version = "^0.5.2", optional = true} 35 | sphinx-click = {version = "^3.0.1", optional = true} 36 | 37 | [tool.poetry.dev-dependencies] 38 | sphinx = {version = "^4.0.2"} 39 | sphinx-autoapi = {version = "^1.6.0"} 40 | sphinx_rtd_theme = {version = "^0.5.2"} 41 | sphinx-click = {version = "^3.0.1"} 42 | 43 | [tool.poetry.extras] 44 | docs = ["sphinx", "sphinx-autoapi", "sphinx_rtd_theme", "sphinx-click"] 45 | 46 | [tool.poetry.scripts] 47 | divbrowse = 'divbrowse.cli:main' 48 | 49 | [build-system] 50 | requires = ["poetry-core>=1.0.0"] 51 | build-backend = "poetry.core.masonry.api" 52 | --------------------------------------------------------------------------------