├── scripts ├── core │ ├── __init__.py │ └── wilcoxontest.py ├── outlier │ ├── __init__.py │ └── motifworkflow.py ├── tests │ ├── __init__.py │ ├── context.py │ └── test_wilcoxon.py ├── casecontrol │ ├── __init__.py │ └── motifworkflow.py ├── requirements.txt └── .gitignore ├── .gitignore ├── examples ├── outlier │ ├── reference.fasta.fai │ ├── str-profiles │ │ ├── sample10.motif.tsv │ │ ├── sample11.motif.tsv │ │ ├── sample12.motif.tsv │ │ ├── sample13.motif.tsv │ │ ├── sample14.motif.tsv │ │ ├── sample15.motif.tsv │ │ ├── sample16.motif.tsv │ │ ├── sample17.motif.tsv │ │ ├── sample18.motif.tsv │ │ ├── sample19.motif.tsv │ │ ├── sample2.motif.tsv │ │ ├── sample20.motif.tsv │ │ ├── sample21.motif.tsv │ │ ├── sample3.motif.tsv │ │ ├── sample4.motif.tsv │ │ ├── sample5.motif.tsv │ │ ├── sample6.motif.tsv │ │ ├── sample7.motif.tsv │ │ ├── sample8.motif.tsv │ │ ├── sample9.motif.tsv │ │ ├── sample1.motif.tsv │ │ ├── sample3.locus.tsv │ │ ├── sample12.locus.tsv │ │ ├── sample15.locus.tsv │ │ ├── sample3.str_profile.json │ │ ├── sample12.str_profile.json │ │ ├── sample15.str_profile.json │ │ ├── sample10.locus.tsv │ │ ├── sample13.locus.tsv │ │ ├── sample17.locus.tsv │ │ ├── sample18.locus.tsv │ │ ├── sample2.locus.tsv │ │ ├── sample20.locus.tsv │ │ ├── sample21.locus.tsv │ │ ├── sample4.locus.tsv │ │ ├── sample5.locus.tsv │ │ ├── sample6.locus.tsv │ │ ├── sample7.locus.tsv │ │ ├── sample8.locus.tsv │ │ ├── sample9.locus.tsv │ │ ├── sample1.locus.tsv │ │ ├── sample11.locus.tsv │ │ ├── sample14.locus.tsv │ │ ├── sample16.locus.tsv │ │ ├── sample19.locus.tsv │ │ ├── sample10.str_profile.json │ │ ├── sample13.str_profile.json │ │ ├── sample17.str_profile.json │ │ ├── sample18.str_profile.json │ │ ├── sample2.str_profile.json │ │ ├── sample20.str_profile.json │ │ ├── sample21.str_profile.json │ │ ├── sample4.str_profile.json │ │ ├── sample6.str_profile.json │ │ ├── sample7.str_profile.json │ │ ├── sample8.str_profile.json │ │ ├── sample9.str_profile.json │ │ ├── sample1.str_profile.json │ │ ├── sample11.str_profile.json │ │ ├── sample14.str_profile.json │ │ ├── sample16.str_profile.json │ │ ├── sample19.str_profile.json │ │ └── sample5.str_profile.json │ ├── example_dataset.outlier_motif.tsv │ ├── bamlets │ │ ├── sample1.bam │ │ ├── sample2.bam │ │ ├── sample3.bam │ │ ├── sample4.bam │ │ ├── sample5.bam │ │ ├── sample6.bam │ │ ├── sample7.bam │ │ ├── sample8.bam │ │ ├── sample9.bam │ │ ├── sample10.bam │ │ ├── sample11.bam │ │ ├── sample12.bam │ │ ├── sample13.bam │ │ ├── sample14.bam │ │ ├── sample15.bam │ │ ├── sample16.bam │ │ ├── sample17.bam │ │ ├── sample18.bam │ │ ├── sample19.bam │ │ ├── sample20.bam │ │ ├── sample21.bam │ │ ├── sample1.bam.bai │ │ ├── sample10.bam.bai │ │ ├── sample11.bam.bai │ │ ├── sample12.bam.bai │ │ ├── sample13.bam.bai │ │ ├── sample14.bam.bai │ │ ├── sample15.bam.bai │ │ ├── sample16.bam.bai │ │ ├── sample17.bam.bai │ │ ├── sample18.bam.bai │ │ ├── sample19.bam.bai │ │ ├── sample2.bam.bai │ │ ├── sample20.bam.bai │ │ ├── sample21.bam.bai │ │ ├── sample3.bam.bai │ │ ├── sample4.bam.bai │ │ ├── sample5.bam.bai │ │ ├── sample6.bam.bai │ │ ├── sample7.bam.bai │ │ ├── sample8.bam.bai │ │ └── sample9.bam.bai │ ├── example_dataset.outlier_locus.tsv │ ├── run-analysis.sh │ ├── manifest.tsv │ └── example_dataset.multisample_profile.json └── case-control │ ├── reference.fasta.fai │ ├── str-profiles │ ├── sample1.motif.tsv │ ├── sample4.motif.tsv │ ├── sample5.motif.tsv │ ├── sample6.motif.tsv │ ├── sample7.motif.tsv │ ├── sample2.motif.tsv │ ├── sample3.motif.tsv │ ├── sample6.locus.tsv │ ├── sample7.locus.tsv │ ├── sample1.locus.tsv │ ├── sample4.locus.tsv │ ├── sample5.locus.tsv │ ├── sample2.locus.tsv │ ├── sample3.locus.tsv │ ├── sample6.str_profile.json │ ├── sample7.str_profile.json │ ├── sample4.str_profile.json │ ├── sample1.str_profile.json │ ├── sample2.str_profile.json │ ├── sample3.str_profile.json │ └── sample5.str_profile.json │ ├── bamlets │ ├── sample1.bam │ ├── sample2.bam │ ├── sample3.bam │ ├── sample4.bam │ ├── sample5.bam │ ├── sample6.bam │ ├── sample7.bam │ ├── sample1.bam.bai │ ├── sample2.bam.bai │ ├── sample3.bam.bai │ ├── sample4.bam.bai │ ├── sample5.bam.bai │ ├── sample6.bam.bai │ └── sample7.bam.bai │ ├── example_dataset.casecontrol_motif.tsv │ ├── manifest.tsv │ ├── example_dataset.casecontrol_locus.tsv │ ├── run-analysis.sh │ └── example_dataset.multisample_profile.json ├── source ├── thirdparty │ ├── boost-cmake │ │ ├── .gitignore │ │ ├── libs │ │ │ ├── type_erasure.cmake │ │ │ ├── graph.cmake │ │ │ ├── timer.cmake │ │ │ ├── header.cmake │ │ │ ├── graph_parallel.cmake │ │ │ ├── system.cmake │ │ │ ├── chrono.cmake │ │ │ ├── atomic.cmake │ │ │ ├── thread.cmake │ │ │ ├── wave.cmake │ │ │ ├── coroutine.cmake │ │ │ ├── context │ │ │ │ ├── jump_combined.S │ │ │ │ ├── make_combined.S │ │ │ │ └── ontop_combined.S │ │ │ ├── regex.cmake │ │ │ ├── test.cmake │ │ │ ├── math.cmake │ │ │ ├── mpi.cmake │ │ │ ├── program_options.cmake │ │ │ ├── filesystem.cmake │ │ │ ├── context.cmake │ │ │ ├── exception.cmake │ │ │ └── serialization.cmake │ │ ├── cmake │ │ │ └── Modules │ │ │ │ ├── PlatformDetect.cmake │ │ │ │ ├── StandaloneBuild.cmake │ │ │ │ ├── CheckPreprocessor.cmake │ │ │ │ ├── CheckBoostVersion.cmake │ │ │ │ ├── ccache.cmake │ │ │ │ ├── AddBoostLib.cmake │ │ │ │ ├── AddBoostTest.cmake │ │ │ │ └── FindIconv.cmake │ │ ├── repack.sh │ │ ├── patch │ │ │ └── 1.63.0 │ │ │ │ ├── context_0002_macOS_execution_context.patch │ │ │ │ └── context_0001_arm64_cpu.patch │ │ ├── LICENSE.md │ │ ├── azure-pipelines.yml │ │ ├── README.md │ │ └── CMakeLists.txt │ ├── htslib-1.9.tar.bz2 │ └── spdlog │ │ ├── version.h │ │ ├── fmt │ │ ├── ostr.h │ │ ├── fmt.h │ │ └── bundled │ │ │ ├── LICENSE.rst │ │ │ └── locale.h │ │ ├── formatter.h │ │ ├── details │ │ ├── null_mutex.h │ │ ├── log_msg.h │ │ ├── console_globals.h │ │ ├── circular_q.h │ │ └── periodic_worker.h │ │ ├── sinks │ │ ├── msvc_sink.h │ │ ├── null_sink.h │ │ ├── ostream_sink.h │ │ ├── sink.h │ │ ├── stdout_color_sinks.h │ │ ├── base_sink.h │ │ ├── basic_file_sink.h │ │ └── dist_sink.h │ │ └── async_logger.h ├── .clang-format ├── reads │ ├── unit_tests │ │ ├── CMakeLists.txt │ │ └── pair_collector_test.cc │ ├── CMakeLists.txt │ ├── Read.cpp │ ├── Read.hh │ ├── Purity.hh │ └── IrrFinder.hh ├── region │ ├── CMakeLists.txt │ └── ReferenceContigInfo.hh ├── merge │ ├── CMakeLists.txt │ ├── MergeWorkflow.hh │ ├── MultisampleProfile.hh │ ├── MultisampleProfile.cpp │ └── MergeParameters.hh ├── profile │ ├── CMakeLists.txt │ ├── ProfileWorkflow.hh │ ├── ReadClassification.hh │ ├── ReadClassification.cpp │ ├── SampleRunStats.hh │ └── ProfileParameters.hh ├── common │ ├── CMakeLists.txt │ ├── Interval.cpp │ ├── SequenceUtils.hh │ ├── Interval.hh │ ├── SequenceUtils.cpp │ └── Parameters.hh ├── .gitignore ├── tests │ ├── UnitTests.cpp │ └── SequenceUtilsTest.cpp ├── app │ ├── Version.hh │ └── GatherParameters.hh ├── io │ ├── HtsHelpers.hh │ ├── Reference.hh │ ├── CMakeLists.txt │ ├── HtsFileStreamer.hh │ ├── Reference.cpp │ └── HtsHelpers.cpp └── Makefile ├── documentation ├── images │ ├── workflow.png │ ├── str-profile.png │ ├── analysis-types.png │ ├── zscores-in-controls.png │ └── zscores-ranks-sizes.png ├── 02_Installation.md ├── 00_Introduction.md ├── 07_Case_control_analysis.md └── 01_Before_you_begin.md └── README.md /scripts/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/outlier/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/casecontrol/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | build 3 | .vscode 4 | .idea 5 | -------------------------------------------------------------------------------- /examples/outlier/reference.fasta.fai: -------------------------------------------------------------------------------- 1 | StrA 4006 6 4006 4007 2 | -------------------------------------------------------------------------------- /scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.17.1 2 | scipy==1.3.1 3 | -------------------------------------------------------------------------------- /examples/case-control/reference.fasta.fai: -------------------------------------------------------------------------------- 1 | StrA 4006 6 4006 4007 2 | StrB 4006 4020 4006 4007 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample10.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample11.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample12.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample13.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample14.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample15.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample16.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample17.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample18.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample19.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample2.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample20.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample21.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample3.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample4.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample5.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample6.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample7.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample8.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample9.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/.gitignore: -------------------------------------------------------------------------------- 1 | *.sw? 2 | *.tmp 3 | build*/ 4 | boost/ 5 | boost_*.tar.xz 6 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample1.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | AGC 5 3.79 3 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample1.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | CCG 39 30.94 3 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample4.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | CCG 1 0.79 3 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample5.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | CCG 4 3.19 3 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample6.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | CCG 1 0.80 3 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample7.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | CCG 3 2.41 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample3.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample12.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample15.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample3.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "Depth": 37.74338492261607, 3 | "ReadLength": 150 4 | } 5 | -------------------------------------------------------------------------------- /documentation/images/workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/documentation/images/workflow.png -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample2.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | AGC 2 1.56 3 | CCG 46 35.89 4 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample3.motif.tsv: -------------------------------------------------------------------------------- 1 | motif num_paired_irrs norm_num_paired_irrs 2 | AGC 1 0.78 3 | CCG 35 27.20 4 | -------------------------------------------------------------------------------- /examples/outlier/example_dataset.outlier_motif.tsv: -------------------------------------------------------------------------------- 1 | motif top_case_zscore high_case_counts counts 2 | AGC 1.46 sample1:5.05 5.05 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample12.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "Depth": 37.74338492261607, 3 | "ReadLength": 150 4 | } 5 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample15.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "Depth": 37.74338492261607, 3 | "ReadLength": 150 4 | } 5 | -------------------------------------------------------------------------------- /documentation/images/str-profile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/documentation/images/str-profile.png -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample1.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample1.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample2.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample2.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample3.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample3.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample4.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample4.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample5.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample5.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample6.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample6.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample7.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample7.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample8.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample8.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample9.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample9.bam -------------------------------------------------------------------------------- /source/thirdparty/htslib-1.9.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/source/thirdparty/htslib-1.9.tar.bz2 -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample10.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample10.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample11.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample11.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample12.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample12.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample13.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample13.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample14.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample14.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample15.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample15.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample16.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample16.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample17.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample17.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample18.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample18.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample19.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample19.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample20.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample20.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample21.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample21.bam -------------------------------------------------------------------------------- /documentation/images/analysis-types.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/documentation/images/analysis-types.png -------------------------------------------------------------------------------- /examples/case-control/bamlets/sample1.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/case-control/bamlets/sample1.bam -------------------------------------------------------------------------------- /examples/case-control/bamlets/sample2.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/case-control/bamlets/sample2.bam -------------------------------------------------------------------------------- /examples/case-control/bamlets/sample3.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/case-control/bamlets/sample3.bam -------------------------------------------------------------------------------- /examples/case-control/bamlets/sample4.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/case-control/bamlets/sample4.bam -------------------------------------------------------------------------------- /examples/case-control/bamlets/sample5.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/case-control/bamlets/sample5.bam -------------------------------------------------------------------------------- /examples/case-control/bamlets/sample6.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/case-control/bamlets/sample6.bam -------------------------------------------------------------------------------- /examples/case-control/bamlets/sample7.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/case-control/bamlets/sample7.bam -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample1.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample1.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample10.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample10.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample11.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample11.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample12.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample12.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample13.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample13.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample14.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample14.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample15.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample15.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample16.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample16.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample17.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample17.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample18.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample18.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample19.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample19.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample2.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample2.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample20.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample20.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample21.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample21.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample3.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample3.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample4.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample4.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample5.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample5.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample6.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample6.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample7.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample7.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample8.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample8.bam.bai -------------------------------------------------------------------------------- /examples/outlier/bamlets/sample9.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/outlier/bamlets/sample9.bam.bai -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample10.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1780 1865 AGC 2 1.58 55 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample13.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1730 2136 AGC 8 6.31 71 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample17.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1793 1794 AGC 1 0.79 52 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample18.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1796 2001 AGC 3 2.37 57 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample2.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1793 1794 AGC 1 0.79 52 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample20.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1843 2117 AGC 2 1.58 55 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample21.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1775 2001 AGC 4 3.15 60 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample4.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1918 2064 AGC 3 2.38 57 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample5.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1749 2106 AGC 12 9.46 81 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample6.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1814 2086 AGC 7 5.50 68 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample7.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1739 2090 AGC 15 11.78 89 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample8.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1752 2113 AGC 2 1.58 55 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample9.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1751 2103 AGC 11 8.64 78 3 | -------------------------------------------------------------------------------- /documentation/images/zscores-in-controls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/documentation/images/zscores-in-controls.png -------------------------------------------------------------------------------- /documentation/images/zscores-ranks-sizes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/documentation/images/zscores-ranks-sizes.png -------------------------------------------------------------------------------- /examples/case-control/bamlets/sample1.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/case-control/bamlets/sample1.bam.bai -------------------------------------------------------------------------------- /examples/case-control/bamlets/sample2.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/case-control/bamlets/sample2.bam.bai -------------------------------------------------------------------------------- /examples/case-control/bamlets/sample3.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/case-control/bamlets/sample3.bam.bai -------------------------------------------------------------------------------- /examples/case-control/bamlets/sample4.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/case-control/bamlets/sample4.bam.bai -------------------------------------------------------------------------------- /examples/case-control/bamlets/sample5.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/case-control/bamlets/sample5.bam.bai -------------------------------------------------------------------------------- /examples/case-control/bamlets/sample6.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/case-control/bamlets/sample6.bam.bai -------------------------------------------------------------------------------- /examples/case-control/bamlets/sample7.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illumina/ExpansionHunterDenovo/HEAD/examples/case-control/bamlets/sample7.bam.bai -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample1.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1702 2102 AGC 42 31.81 156 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample11.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1765 2159 AGC 15 11.83 89 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample14.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1733 2157 AGC 13 10.25 84 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample16.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1779 2117 AGC 14 11.00 86 3 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample19.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1718 2116 AGC 20 15.65 102 3 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample6.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrB 1729 2076 CCG 20 15.99 103 3 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample7.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrB 1761 2135 CCG 19 15.27 100 3 | -------------------------------------------------------------------------------- /scripts/tests/context.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.insert(0, os.path.abspath( 4 | os.path.join(os.path.dirname(__file__), '..'))) 5 | 6 | import region 7 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample1.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1734 2051 AGC 8 6.35 71 3 | StrB 1684 2068 CCG 21 16.66 105 4 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample4.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1769 2067 AGC 5 3.97 63 3 | StrB 1797 2115 CCG 28 22.26 124 4 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample5.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1744 2123 AGC 9 7.18 73 3 | StrB 1695 2130 CCG 19 15.16 100 4 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample2.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1771 2053 AGC 32 24.96 133 3 | StrB 1757 2154 CCG 22 17.16 107 4 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample3.locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif num_anc_irrs norm_num_anc_irrs het_str_size 2 | StrA 1723 2104 AGC 29 22.54 125 3 | StrB 1783 2074 CCG 30 23.31 127 4 | -------------------------------------------------------------------------------- /source/.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: WebKit 2 | ColumnLimit: 120 3 | AlignAfterOpenBracket: AlwaysBreak 4 | BreakBeforeBraces: Allman 5 | BreakStringLiterals: true 6 | ReflowComments: true 7 | CompactNamespaces: true 8 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/type_erasure.cmake: -------------------------------------------------------------------------------- 1 | _add_boost_lib( 2 | NAME type_erasure 3 | SOURCES 4 | ${BOOST_SOURCE}/libs/type_erasure/src/dynamic_binding.cpp 5 | LINK 6 | Boost::thread 7 | ) 8 | -------------------------------------------------------------------------------- /source/reads/unit_tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(pair_collector_test pair_collector_test.cc) 2 | target_link_libraries(pair_collector_test pair_collector gtest gmock_main) 3 | add_test(NAME pair_collector_test COMMAND pair_collector_test) -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/graph.cmake: -------------------------------------------------------------------------------- 1 | _add_boost_lib( 2 | NAME graph 3 | SOURCES 4 | ${BOOST_SOURCE}/libs/graph/src/graphml.cpp 5 | ${BOOST_SOURCE}/libs/graph/src/read_graphviz_new.cpp 6 | LINK Boost::regex 7 | ) 8 | -------------------------------------------------------------------------------- /examples/outlier/example_dataset.outlier_locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif top_case_zscore high_case_counts counts 2 | StrA 1702 2159 AGC 1.42 sample1:42.41 42.41,2.11,15.77,8.41,13.67,14.66,1.06,3.15,20.86,1.06,2.11,4.21,3.17,12.62,7.33,15.71,2.11,11.52 3 | -------------------------------------------------------------------------------- /source/region/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(region STATIC 2 | GenomicRegion.hh GenomicRegion.cpp 3 | ReferenceContigInfo.hh ReferenceContigInfo.cpp) 4 | target_include_directories(region PUBLIC ${CMAKE_SOURCE_DIR}) 5 | target_link_libraries(region Boost::boost) 6 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/timer.cmake: -------------------------------------------------------------------------------- 1 | _add_boost_lib( 2 | NAME timer 3 | SOURCES 4 | ${BOOST_SOURCE}/libs/timer/src/auto_timers_construction.cpp 5 | ${BOOST_SOURCE}/libs/timer/src/cpu_timer.cpp 6 | DEFINE_PRIVATE 7 | BOOST_TIMER_STATIC_LINK=1 8 | LINK 9 | Boost::chrono 10 | ) 11 | -------------------------------------------------------------------------------- /examples/case-control/example_dataset.casecontrol_motif.tsv: -------------------------------------------------------------------------------- 1 | motif pvalue bonf_pvalue counts 2 | CCG 0.01694742676234462 0.01694742676234462 sample1:41.249900990099015,sample2:47.84836092177864,sample3:36.26511477529906,sample4:1.05978835978836,sample5:4.256042496679947,sample6:1.066134397870925,sample7:3.2144433299899697 3 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/header.cmake: -------------------------------------------------------------------------------- 1 | # Define the header-only Boost target 2 | add_library(Boost::boost INTERFACE IMPORTED GLOBAL) 3 | target_include_directories(Boost::boost SYSTEM INTERFACE ${BOOST_SOURCE}) 4 | 5 | # Disable autolink 6 | target_compile_definitions(Boost::boost INTERFACE BOOST_ALL_NO_LIB=1) 7 | -------------------------------------------------------------------------------- /source/reads/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(reads STATIC 2 | Read.hh Read.cpp 3 | ../profile/PairCollector.hh ../profile/PairCollector.cpp 4 | ../profile/ReadClassification.hh ../profile/ReadClassification.cpp 5 | IrrFinder.hh IrrFinder.cpp 6 | Purity.hh Purity.cpp) 7 | target_link_libraries(reads common) 8 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/cmake/Modules/PlatformDetect.cmake: -------------------------------------------------------------------------------- 1 | # Basic platform detection 2 | include(CheckPreprocessor) 3 | check_preprocessor(USE_LINUX __linux__) 4 | check_preprocessor(USE_WINDOWS _WIN32) 5 | check_preprocessor(USE_APPLE __APPLE__) 6 | check_preprocessor(USE_ANDROID __ANDROID__) 7 | check_preprocessor(USE_FREEBSD __FreeBSD__) 8 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample10.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 2, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1780-1865": 2 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 37.89316025961058, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample13.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 8, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1730-2136": 8 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 38.04293559660509, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample17.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 1, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1793-1794": 1 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 37.89316025961058, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample18.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 3, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1796-2001": 3 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 38.04293559660509, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample2.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 1, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1793-1794": 1 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 37.89316025961058, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample20.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 2, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1843-2117": 2 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 37.89316025961058, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample21.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 4, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1775-2001": 4 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 38.04293559660509, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample4.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 3, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1918-2064": 3 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 37.89316025961058, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample6.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 7, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1814-2086": 7 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 38.1927109335996, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample7.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 15, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1739-2090": 15 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 38.1927109335996, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample8.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 2, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1752-2113": 2 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 37.89316025961058, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample9.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 11, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1751-2103": 11 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 38.1927109335996, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample1.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 42, 4 | "IrrPairCount": 5, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1702-2102": 42 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 39.61557663504743, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample11.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 15, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1765-2159": 15 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 38.04293559660509, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample14.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 13, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1733-2157": 13 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 38.04293559660509, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample16.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 14, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1779-2117": 14 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 38.1927109335996, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample19.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 20, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1718-2116": 20 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 38.34248627059411, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/outlier/str-profiles/sample5.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 12, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1749-2106": 12 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "Depth": 38.04293559660509, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/graph_parallel.cmake: -------------------------------------------------------------------------------- 1 | if(NOT TARGET Boost_mpi) 2 | return() 3 | endif() 4 | 5 | _add_boost_lib( 6 | NAME graph_parallel 7 | SOURCES 8 | ${BOOST_SOURCE}/libs/graph_parallel/src/mpi_process_group.cpp 9 | ${BOOST_SOURCE}/libs/graph_parallel/src/tag_allocator.cpp 10 | LINK 11 | Boost::mpi 12 | ) 13 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample6.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "CCG": { 3 | "AnchoredIrrCount": 20, 4 | "IrrPairCount": 1, 5 | "RegionsWithIrrAnchors": { 6 | "StrB:1729-2076": 20 7 | }, 8 | "RepeatUnit": "CCG" 9 | }, 10 | "Depth": 37.51872191712431, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample7.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "CCG": { 3 | "AnchoredIrrCount": 19, 4 | "IrrPairCount": 3, 5 | "RegionsWithIrrAnchors": { 6 | "StrB:1761-2135": 19 7 | }, 8 | "RepeatUnit": "CCG" 9 | }, 10 | "Depth": 37.33150274588118, 11 | "ReadLength": 150 12 | } 13 | -------------------------------------------------------------------------------- /source/merge/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(mergeworkflow STATIC 2 | MergeWorkflow.hh MergeWorkflow.cpp 3 | MergeParameters.hh MergeParameters.cpp 4 | MultisampleProfile.hh MultisampleProfile.cpp) 5 | 6 | target_link_libraries(mergeworkflow io Boost::filesystem region) 7 | target_include_directories(mergeworkflow PUBLIC ${CMAKE_SOURCE_DIR}) 8 | -------------------------------------------------------------------------------- /source/profile/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(profileworkflow STATIC 2 | ProfileWorkflow.hh ProfileWorkflow.cpp 3 | ProfileParameters.hh ProfileParameters.cpp 4 | SampleRunStats.hh SampleRunStats.cpp) 5 | 6 | target_link_libraries(profileworkflow io Boost::filesystem) 7 | target_include_directories(profileworkflow PUBLIC ${CMAKE_SOURCE_DIR}) 8 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/cmake/Modules/StandaloneBuild.cmake: -------------------------------------------------------------------------------- 1 | if(CMAKE_CURRENT_SOURCE_DIR STREQUAL "${CMAKE_SOURCE_DIR}") 2 | message(STATUS "Standalone mode detected") 3 | set(BOOST_STANDALONE ON) 4 | set(CMAKE_CXX_STANDARD 11) 5 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 6 | set(CMAKE_CXX_EXTENSIONS OFF) 7 | enable_testing() 8 | 9 | include(ccache) 10 | endif() 11 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/version.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #define SPDLOG_VER_MAJOR 1 9 | #define SPDLOG_VER_MINOR 3 10 | #define SPDLOG_VER_PATCH 1 11 | 12 | #define SPDLOG_VERSION (SPDLOG_VER_MAJOR * 10000 + SPDLOG_VER_MINOR * 100 + SPDLOG_VER_PATCH) 13 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/cmake/Modules/CheckPreprocessor.cmake: -------------------------------------------------------------------------------- 1 | include(CheckCXXSymbolExists) 2 | 3 | function(check_preprocessor output_variable symbol) 4 | set(CMAKE_REQUIRED_QUIET OFF) 5 | check_cxx_symbol_exists(${symbol} "" ${output_variable}) 6 | if(NOT ${output_variable}) 7 | set(${output_variable} 0 CACHE INTERNAL "Have symbol ${symbol}" FORCE) 8 | endif() 9 | endfunction() 10 | 11 | -------------------------------------------------------------------------------- /examples/case-control/manifest.tsv: -------------------------------------------------------------------------------- 1 | sample1 case str-profiles/sample1.str_profile.json 2 | sample2 case str-profiles/sample2.str_profile.json 3 | sample3 case str-profiles/sample3.str_profile.json 4 | sample4 control str-profiles/sample4.str_profile.json 5 | sample5 control str-profiles/sample5.str_profile.json 6 | sample6 control str-profiles/sample6.str_profile.json 7 | sample7 control str-profiles/sample7.str_profile.json 8 | -------------------------------------------------------------------------------- /source/common/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(common STATIC 2 | Parameters.hh 3 | SequenceUtils.hh SequenceUtils.cpp Interval.cpp Interval.hh) 4 | 5 | target_include_directories(common PUBLIC ${CMAKE_SOURCE_DIR}) 6 | target_link_libraries(common Boost::boost) 7 | 8 | #add_executable(SequenceUtilsTest SequenceUtilsTest.cpp) 9 | #target_link_libraries(SequenceUtilsTest common) 10 | #add_test(NAME SequenceUtilsTest COMMAND SequenceUtilsTest) 11 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/fmt/ostr.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // include bundled or external copy of fmtlib's ostream support 9 | // 10 | #if !defined(SPDLOG_FMT_EXTERNAL) 11 | #ifndef FMT_HEADER_ONLY 12 | #define FMT_HEADER_ONLY 13 | #endif 14 | #include "bundled/ostream.h" 15 | #include "fmt.h" 16 | #else 17 | #include 18 | #endif 19 | -------------------------------------------------------------------------------- /source/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | 3 | # Prerequisites 4 | *.d 5 | 6 | # Compiled Object files 7 | *.slo 8 | *.lo 9 | *.o 10 | *.obj 11 | 12 | # Precompiled Headers 13 | *.gch 14 | *.pch 15 | 16 | # Compiled Dynamic libraries 17 | *.so 18 | *.dylib 19 | *.dll 20 | 21 | # Fortran module files 22 | *.mod 23 | *.smod 24 | 25 | # Compiled Static libraries 26 | *.lai 27 | *.la 28 | *.a 29 | *.lib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | 36 | cmake-build-debug/ 37 | .idea/ 38 | .DS_Store 39 | boost_1_67_0.tar.bz2 -------------------------------------------------------------------------------- /examples/case-control/example_dataset.casecontrol_locus.tsv: -------------------------------------------------------------------------------- 1 | contig start end motif pvalue bonf_pvalue counts 2 | StrA 1723 2123 AGC 0.03854993587177091 0.07709987174354183 sample1:8.461518151815183,sample2:33.28581629341123,sample3:30.048237956676367,sample4:5.298941798941799,sample5:9.576095617529882 3 | StrB 1684 2154 CCG 0.07864960352514261 0.15729920705028522 sample1:22.211485148514853,sample2:22.88399870172022,sample3:31.08438409311348,sample4:29.674074074074078,sample5:20.21620185922975,sample6:21.3226879574185,sample7:20.358141089936474 4 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/formatter.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "fmt/fmt.h" 9 | #include "spdlog/details/log_msg.h" 10 | 11 | namespace spdlog { 12 | 13 | class formatter 14 | { 15 | public: 16 | virtual ~formatter() = default; 17 | virtual void format(const details::log_msg &msg, fmt::memory_buffer &dest) = 0; 18 | virtual std::unique_ptr clone() const = 0; 19 | }; 20 | } // namespace spdlog 21 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample4.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 5, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1769-2067": 5 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "CCG": { 11 | "AnchoredIrrCount": 28, 12 | "IrrPairCount": 1, 13 | "RegionsWithIrrAnchors": { 14 | "StrB:1797-2115": 28 15 | }, 16 | "RepeatUnit": "CCG" 17 | }, 18 | "Depth": 37.74338492261607, 19 | "ReadLength": 150 20 | } 21 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/cmake/Modules/CheckBoostVersion.cmake: -------------------------------------------------------------------------------- 1 | # Detect Boost version 2 | file(STRINGS "${BOOST_SOURCE}/boost/version.hpp" boost_version_raw 3 | REGEX "define BOOST_VERSION " 4 | ) 5 | string(REGEX MATCH "[0-9]+" boost_version_raw "${boost_version_raw}") 6 | math(EXPR BOOST_VERSION_MAJOR "${boost_version_raw} / 100000") 7 | math(EXPR BOOST_VERSION_MINOR "${boost_version_raw} / 100 % 1000") 8 | math(EXPR BOOST_VERSION_PATCH "${boost_version_raw} % 100") 9 | set(BOOST_VERSION "${BOOST_VERSION_MAJOR}.${BOOST_VERSION_MINOR}.${BOOST_VERSION_PATCH}") 10 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample1.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 8, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1734-2051": 8 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "CCG": { 11 | "AnchoredIrrCount": 21, 12 | "IrrPairCount": 39, 13 | "RegionsWithIrrAnchors": { 14 | "StrB:1684-2068": 21 15 | }, 16 | "RepeatUnit": "CCG" 17 | }, 18 | "Depth": 37.81827259111333, 19 | "ReadLength": 150 20 | } 21 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample2.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 32, 4 | "IrrPairCount": 2, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1771-2053": 32 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "CCG": { 11 | "AnchoredIrrCount": 22, 12 | "IrrPairCount": 46, 13 | "RegionsWithIrrAnchors": { 14 | "StrB:1757-2154": 22 15 | }, 16 | "RepeatUnit": "CCG" 17 | }, 18 | "Depth": 38.45481777333999, 19 | "ReadLength": 150 20 | } 21 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample3.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 29, 4 | "IrrPairCount": 1, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1723-2104": 29 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "CCG": { 11 | "AnchoredIrrCount": 30, 12 | "IrrPairCount": 35, 13 | "RegionsWithIrrAnchors": { 14 | "StrB:1783-2074": 30 15 | }, 16 | "RepeatUnit": "CCG" 17 | }, 18 | "Depth": 38.6045931103345, 19 | "ReadLength": 150 20 | } 21 | -------------------------------------------------------------------------------- /examples/case-control/str-profiles/sample5.str_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "AGC": { 3 | "AnchoredIrrCount": 9, 4 | "IrrPairCount": 0, 5 | "RegionsWithIrrAnchors": { 6 | "StrA:1744-2123": 9 7 | }, 8 | "RepeatUnit": "AGC" 9 | }, 10 | "CCG": { 11 | "AnchoredIrrCount": 19, 12 | "IrrPairCount": 4, 13 | "RegionsWithIrrAnchors": { 14 | "StrB:1695-2130": 19 15 | }, 16 | "RepeatUnit": "CCG" 17 | }, 18 | "Depth": 37.593609585621564, 19 | "ReadLength": 150 20 | } 21 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/cmake/Modules/ccache.cmake: -------------------------------------------------------------------------------- 1 | find_program(CCACHE_BIN ccache) 2 | if(CCACHE_BIN) 3 | set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_BIN}) 4 | set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_BIN}) 5 | 6 | # ccache uses -I when compiling without preprocessor, which makes clang complain. 7 | if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 8 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments -fcolor-diagnostics") 9 | set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Qunused-arguments -fcolor-diagnostics") 10 | endif() 11 | endif() 12 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/system.cmake: -------------------------------------------------------------------------------- 1 | _add_boost_lib( 2 | NAME system 3 | SOURCES 4 | ${BOOST_SOURCE}/libs/system/src/error_code.cpp 5 | DEFINE_PRIVATE 6 | BOOST_SYSTEM_STATIC_LINK=1 7 | ) 8 | 9 | _add_boost_test( 10 | NAME system_test 11 | LINK 12 | Boost::boost 13 | TESTS 14 | RUN ${BOOST_SOURCE}/libs/system/test/error_code_test.cpp 15 | RUN ${BOOST_SOURCE}/libs/system/test/error_code_user_test.cpp 16 | RUN ${BOOST_SOURCE}/libs/system/test/system_error_test.cpp 17 | RUN ${BOOST_SOURCE}/libs/system/test/header_only_test.cpp 18 | RUN ${BOOST_SOURCE}/libs/system/test/config_test.cpp 19 | ) 20 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/fmt/fmt.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016-2018 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | // 9 | // Include a bundled header-only copy of fmtlib or an external one. 10 | // By default spdlog include its own copy. 11 | // 12 | 13 | #if !defined(SPDLOG_FMT_EXTERNAL) 14 | #ifndef FMT_HEADER_ONLY 15 | #define FMT_HEADER_ONLY 16 | #endif 17 | #ifndef FMT_USE_WINDOWS_H 18 | #define FMT_USE_WINDOWS_H 0 19 | #endif 20 | #include "bundled/core.h" 21 | #include "bundled/format.h" 22 | #else // external fmtlib 23 | #include 24 | #include 25 | #endif 26 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/chrono.cmake: -------------------------------------------------------------------------------- 1 | _add_boost_lib( 2 | NAME chrono 3 | SOURCES 4 | ${BOOST_SOURCE}/libs/chrono/src/chrono.cpp 5 | ${BOOST_SOURCE}/libs/chrono/src/process_cpu_clocks.cpp 6 | ${BOOST_SOURCE}/libs/chrono/src/thread_clock.cpp 7 | DEFINE_PRIVATE 8 | BOOST_CHRONO_STATIC_LINK=1 9 | BOOST_SYSTEM_NO_DEPRECATED 10 | ) 11 | if(NOT USE_WINDOWS) 12 | find_package(Threads) 13 | target_link_libraries(Boost_chrono PRIVATE Threads::Threads) 14 | 15 | find_library(RT_LIBRARY 16 | NAMES rt 17 | DOC "rt library" 18 | ) 19 | if(RT_LIBRARY) 20 | target_link_libraries(Boost_chrono PRIVATE ${RT_LIBRARY}) 21 | endif() 22 | endif() 23 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/atomic.cmake: -------------------------------------------------------------------------------- 1 | _add_boost_lib( 2 | NAME atomic 3 | SOURCES 4 | ${BOOST_SOURCE}/libs/atomic/src/lockpool.cpp 5 | DEFINE_PRIVATE 6 | BOOST_ATOMIC_STATIC_LINK=1 7 | BOOST_ATOMIC_SOURCE 8 | ) 9 | 10 | _add_boost_test( 11 | NAME atomic_test 12 | LINK 13 | Boost::atomic 14 | Boost::thread 15 | DEFINE 16 | # The tests expect APIs deprecated in Thread v4, so enable them 17 | BOOST_THREAD_PROVIDES_NESTED_LOCKS=1 18 | BOOST_THREAD_USES_DATETIME=1 19 | TESTS 20 | RUN ${BOOST_SOURCE}/libs/atomic/test/native_api.cpp 21 | RUN ${BOOST_SOURCE}/libs/atomic/test/fallback_api.cpp 22 | RUN ${BOOST_SOURCE}/libs/atomic/test/atomicity.cpp 23 | RUN ${BOOST_SOURCE}/libs/atomic/test/ordering.cpp 24 | RUN ${BOOST_SOURCE}/libs/atomic/test/lockfree.cpp 25 | ) 26 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/thread.cmake: -------------------------------------------------------------------------------- 1 | if(USE_WINDOWS) 2 | set(THREAD_SRCS 3 | ${BOOST_SOURCE}/libs/thread/src/win32/thread.cpp 4 | ${BOOST_SOURCE}/libs/thread/src/win32/tss_pe.cpp 5 | ) 6 | else() 7 | set(THREAD_SRCS 8 | ${BOOST_SOURCE}/libs/thread/src/pthread/thread.cpp 9 | ${BOOST_SOURCE}/libs/thread/src/pthread/once.cpp 10 | ) 11 | endif() 12 | 13 | _add_boost_lib( 14 | NAME thread 15 | SOURCES 16 | ${THREAD_SRCS} 17 | ${BOOST_SOURCE}/libs/thread/src/future.cpp 18 | DEFINE_PRIVATE 19 | BOOST_THREAD_BUILD_LIB=1 20 | LINK 21 | Boost::chrono 22 | ) 23 | if(NOT USE_WINDOWS) 24 | find_package(Threads REQUIRED) 25 | 26 | target_link_libraries(Boost_thread PUBLIC Threads::Threads) 27 | target_compile_definitions(Boost_thread PRIVATE 28 | BOOST_THREAD_POSIX 29 | ) 30 | endif() 31 | -------------------------------------------------------------------------------- /examples/outlier/run-analysis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for bamlet in bamlets/*.bam 4 | do 5 | sample=$(basename $bamlet) 6 | sample=${sample%.bam} 7 | 8 | ../../build/ExpansionHunterDenovo profile \ 9 | --reads $bamlet \ 10 | --reference reference.fasta \ 11 | --output-prefix str-profiles/${sample} 12 | done 13 | 14 | ../../build/ExpansionHunterDenovo merge \ 15 | --reference reference.fasta \ 16 | --manifest manifest.tsv \ 17 | --output-prefix example_dataset 18 | 19 | ../../scripts/outlier.py locus \ 20 | --manifest manifest.tsv \ 21 | --multisample-profile example_dataset.multisample_profile.json \ 22 | --output example_dataset.outlier_locus.tsv 23 | 24 | ../../scripts/outlier.py motif \ 25 | --manifest manifest.tsv \ 26 | --multisample-profile example_dataset.multisample_profile.json \ 27 | --output example_dataset.outlier_motif.tsv 28 | 29 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/wave.cmake: -------------------------------------------------------------------------------- 1 | _add_boost_lib( 2 | NAME wave 3 | SOURCES 4 | ${BOOST_SOURCE}/libs/wave/src/cpplexer/re2clex/aq.cpp 5 | ${BOOST_SOURCE}/libs/wave/src/cpplexer/re2clex/cpp_re.cpp 6 | ${BOOST_SOURCE}/libs/wave/src/instantiate_cpp_exprgrammar.cpp 7 | ${BOOST_SOURCE}/libs/wave/src/instantiate_cpp_grammar.cpp 8 | ${BOOST_SOURCE}/libs/wave/src/instantiate_cpp_literalgrs.cpp 9 | ${BOOST_SOURCE}/libs/wave/src/instantiate_defined_grammar.cpp 10 | ${BOOST_SOURCE}/libs/wave/src/instantiate_predef_macros.cpp 11 | ${BOOST_SOURCE}/libs/wave/src/instantiate_re2c_lexer.cpp 12 | ${BOOST_SOURCE}/libs/wave/src/instantiate_re2c_lexer_str.cpp 13 | ${BOOST_SOURCE}/libs/wave/src/token_ids.cpp 14 | ${BOOST_SOURCE}/libs/wave/src/wave_config_constant.cpp 15 | LINK 16 | Boost::date_time 17 | Boost::filesystem 18 | Boost::thread 19 | ) 20 | -------------------------------------------------------------------------------- /source/common/Interval.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #include "common/Interval.hh" 23 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/coroutine.cmake: -------------------------------------------------------------------------------- 1 | if(USE_WINDOWS) 2 | set(coroutine_srcs 3 | ${BOOST_SOURCE}/libs/coroutine/src/windows/stack_traits.cpp 4 | ) 5 | else() 6 | set(coroutine_srcs 7 | ${BOOST_SOURCE}/libs/coroutine/src/posix/stack_traits.cpp 8 | ) 9 | endif() 10 | 11 | _add_boost_lib( 12 | NAME coroutine 13 | SOURCES 14 | ${coroutine_srcs} 15 | ${BOOST_SOURCE}/libs/coroutine/src/detail/coroutine_context.cpp 16 | ${BOOST_SOURCE}/libs/coroutine/src/exceptions.cpp 17 | DEFINE_PRIVATE 18 | BOOST_COROUTINES_SOURCE 19 | LINK 20 | Boost::context 21 | Boost::thread 22 | ) 23 | 24 | _add_boost_test( 25 | NAME coroutine_test 26 | LINK 27 | Boost::coroutine 28 | Boost::unit_test_framework 29 | TESTS 30 | RUN ${BOOST_SOURCE}/libs/coroutine/test/test_asymmetric_coroutine.cpp 31 | RUN ${BOOST_SOURCE}/libs/coroutine/test/test_symmetric_coroutine.cpp 32 | ) 33 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/details/null_mutex.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | // null, no cost dummy "mutex" and dummy "atomic" int 10 | 11 | namespace spdlog { 12 | namespace details { 13 | struct null_mutex 14 | { 15 | void lock() {} 16 | void unlock() {} 17 | bool try_lock() 18 | { 19 | return true; 20 | } 21 | }; 22 | 23 | struct null_atomic_int 24 | { 25 | int value; 26 | null_atomic_int() = default; 27 | 28 | explicit null_atomic_int(int val) 29 | : value(val) 30 | { 31 | } 32 | 33 | int load(std::memory_order) const 34 | { 35 | return value; 36 | } 37 | 38 | void store(int val) 39 | { 40 | value = val; 41 | } 42 | }; 43 | 44 | } // namespace details 45 | } // namespace spdlog 46 | -------------------------------------------------------------------------------- /source/tests/UnitTests.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #define CATCH_CONFIG_MAIN 23 | #include "thirdparty/catch2/catch.hpp" 24 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/context/jump_combined.S: -------------------------------------------------------------------------------- 1 | #if defined(__APPLE__) 2 | #if defined(__arm__) 3 | #include "libs/context/src/asm/jump_arm_aapcs_macho_gas.S" 4 | #elif defined(__arm64__) 5 | #include "libs/context/src/asm/jump_arm64_aapcs_macho_gas.S" 6 | #else 7 | // Other kinds of macOS or iOS Simulator 8 | #include "libs/context/src/asm/jump_combined_sysv_macho_gas.S" 9 | #endif 10 | #elif defined(__linux__) || defined(__FreeBSD__) 11 | #if defined(__arm__) 12 | #include "libs/context/src/asm/jump_arm_aapcs_elf_gas.S" 13 | #elif defined(__aarch64__) 14 | #include "libs/context/src/asm/jump_arm64_aapcs_elf_gas.S" 15 | #elif defined(__i386__) 16 | #include "libs/context/src/asm/jump_i386_sysv_elf_gas.S" 17 | #elif defined(__x86_64__) 18 | #include "libs/context/src/asm/jump_x86_64_sysv_elf_gas.S" 19 | #else 20 | #error "Unknown platform" 21 | #endif 22 | #else 23 | #error "Unknown platform" 24 | #endif 25 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/context/make_combined.S: -------------------------------------------------------------------------------- 1 | #if defined(__APPLE__) 2 | #if defined(__arm__) 3 | #include "libs/context/src/asm/make_arm_aapcs_macho_gas.S" 4 | #elif defined(__arm64__) 5 | #include "libs/context/src/asm/make_arm64_aapcs_macho_gas.S" 6 | #else 7 | // Other kinds of macOS or iOS Simulator 8 | #include "libs/context/src/asm/make_combined_sysv_macho_gas.S" 9 | #endif 10 | #elif defined(__linux__) || defined(__FreeBSD__) 11 | #if defined(__arm__) 12 | #include "libs/context/src/asm/make_arm_aapcs_elf_gas.S" 13 | #elif defined(__aarch64__) 14 | #include "libs/context/src/asm/make_arm64_aapcs_elf_gas.S" 15 | #elif defined(__i386__) 16 | #include "libs/context/src/asm/make_i386_sysv_elf_gas.S" 17 | #elif defined(__x86_64__) 18 | #include "libs/context/src/asm/make_x86_64_sysv_elf_gas.S" 19 | #else 20 | #error "Unknown platform" 21 | #endif 22 | #else 23 | #error "Unknown platform" 24 | #endif 25 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/context/ontop_combined.S: -------------------------------------------------------------------------------- 1 | #if defined(__APPLE__) 2 | #if defined(__arm__) 3 | #include "libs/context/src/asm/ontop_arm_aapcs_macho_gas.S" 4 | #elif defined(__arm64__) 5 | #include "libs/context/src/asm/ontop_arm64_aapcs_macho_gas.S" 6 | #else 7 | // Other kinds of macOS or iOS Simulator 8 | #include "libs/context/src/asm/ontop_combined_sysv_macho_gas.S" 9 | #endif 10 | #elif defined(__linux__) || defined(__FreeBSD__) 11 | #if defined(__arm__) 12 | #include "libs/context/src/asm/ontop_arm_aapcs_elf_gas.S" 13 | #elif defined(__aarch64__) 14 | #include "libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S" 15 | #elif defined(__i386__) 16 | #include "libs/context/src/asm/ontop_i386_sysv_elf_gas.S" 17 | #elif defined(__x86_64__) 18 | #include "libs/context/src/asm/ontop_x86_64_sysv_elf_gas.S" 19 | #else 20 | #error "Unknown platform" 21 | #endif 22 | #else 23 | #error "Unknown platform" 24 | #endif 25 | -------------------------------------------------------------------------------- /source/app/Version.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | #include 25 | 26 | const std::string kProgramVersion = "ExpansionHunter Denovo v0.9.1"; 27 | -------------------------------------------------------------------------------- /source/merge/MergeWorkflow.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | #include "merge/MergeParameters.hh" 25 | 26 | int runMergeWorkflow(const MergeWorkflowParameters& parameters); 27 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/regex.cmake: -------------------------------------------------------------------------------- 1 | _add_boost_lib( 2 | NAME regex 3 | SOURCES 4 | ${BOOST_SOURCE}/libs/regex/src/c_regex_traits.cpp 5 | ${BOOST_SOURCE}/libs/regex/src/cpp_regex_traits.cpp 6 | ${BOOST_SOURCE}/libs/regex/src/cregex.cpp 7 | ${BOOST_SOURCE}/libs/regex/src/fileiter.cpp 8 | ${BOOST_SOURCE}/libs/regex/src/icu.cpp 9 | ${BOOST_SOURCE}/libs/regex/src/instances.cpp 10 | ${BOOST_SOURCE}/libs/regex/src/posix_api.cpp 11 | ${BOOST_SOURCE}/libs/regex/src/regex.cpp 12 | ${BOOST_SOURCE}/libs/regex/src/regex_debug.cpp 13 | ${BOOST_SOURCE}/libs/regex/src/regex_raw_buffer.cpp 14 | ${BOOST_SOURCE}/libs/regex/src/regex_traits_defaults.cpp 15 | ${BOOST_SOURCE}/libs/regex/src/static_mutex.cpp 16 | ${BOOST_SOURCE}/libs/regex/src/w32_regex_traits.cpp 17 | ${BOOST_SOURCE}/libs/regex/src/wc_regex_traits.cpp 18 | ${BOOST_SOURCE}/libs/regex/src/wide_posix_api.cpp 19 | ${BOOST_SOURCE}/libs/regex/src/winstances.cpp 20 | ${BOOST_SOURCE}/libs/regex/src/usinstances.cpp 21 | ) 22 | -------------------------------------------------------------------------------- /source/profile/ProfileWorkflow.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | #include "profile/ProfileParameters.hh" 25 | 26 | int runProfileWorkflow(const ProfileWorkflowParameters& parameters); 27 | -------------------------------------------------------------------------------- /source/app/GatherParameters.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | #include 25 | 26 | #include "common/Parameters.hh" 27 | 28 | boost::optional loadParameters(int argc, char** argv); 29 | -------------------------------------------------------------------------------- /source/common/SequenceUtils.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | #include 25 | 26 | // The standard reverse complement operation; non-core nucletode bases are converted to Ns 27 | std::string reverseComplement(const std::string& sequence); 28 | -------------------------------------------------------------------------------- /examples/case-control/run-analysis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | # Generate STR profiles for each sample 5 | for bamlet in bamlets/*.bam 6 | do 7 | sample=$(basename $bamlet) 8 | sample=${sample%.bam} 9 | 10 | ../../build/ExpansionHunterDenovo profile \ 11 | --reads $bamlet \ 12 | --reference reference.fasta \ 13 | --output-prefix str-profiles/${sample} 14 | done 15 | 16 | # Merge STR profiles into multi-sample STR profile 17 | ../../build/ExpansionHunterDenovo merge \ 18 | --reference reference.fasta \ 19 | --manifest manifest.tsv \ 20 | --output-prefix example_dataset 21 | 22 | 23 | # Perform locus-based case-control comparison 24 | ../../scripts/casecontrol.py locus \ 25 | --manifest manifest.tsv \ 26 | --multisample-profile example_dataset.multisample_profile.json \ 27 | --output example_dataset.casecontrol_locus.tsv 28 | 29 | 30 | # Perform motif-based case-control comparison 31 | ../../scripts/casecontrol.py motif \ 32 | --manifest manifest.tsv \ 33 | --multisample-profile example_dataset.multisample_profile.json \ 34 | --output example_dataset.casecontrol_motif.tsv 35 | 36 | -------------------------------------------------------------------------------- /documentation/02_Installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ExpansionHunter Denovo is designed for Linux and macOS operating systems. 4 | Compiled binaries can be downloaded from the [releases 5 | page](https://github.com/Illumina/ExpansionHunterDenovo/releases). 6 | Alternatively, the program can be built from source following the 7 | instructions below. 8 | 9 | ## Building from source 10 | 11 | Prerequisites: 12 | 13 | - GCC or clang compiler supporting C++11 standard 14 | - CMake version 3.10 or above 15 | - Libraries zlib, bzip2, liblzma along with their development files; these 16 | can be installed on Ubuntu Linux like so: 17 | 18 | ```bash 19 | sudo apt install zlib1g-dev libbz2-dev liblzma-dev 20 | ``` 21 | 22 | - Active internet connection (to automatically download Boost libraries) 23 | 24 | Once the above prerequisites are satisfied the program can be built as follows: 25 | 26 | ```bash 27 | cd ExpansionHunterDenovo/ 28 | mkdir build && cd build 29 | cmake -DCMAKE_BUILD_TYPE=Release ../source 30 | make 31 | ``` 32 | 33 | If the build procedure succeeds, the `build` directory will contain the 34 | `ExpansionHunterDenovo` binary file. 35 | -------------------------------------------------------------------------------- /source/reads/unit_tests/pair_collector_test.cc: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | // Code to detect sequences (in-repeat reads) that consist of repetitions of some short string (repeat unit). For 23 | // example, CGGCGGCGGCGG is an in-repeat read with repeat unit CGG. 24 | 25 | #include "profile/PairCollector.hh" 26 | 27 | #include "gmock/gmock.h" 28 | 29 | TEST(ReadCollector, CollectsReads) {} 30 | -------------------------------------------------------------------------------- /documentation/00_Introduction.md: -------------------------------------------------------------------------------- 1 | # A guide to ExpansionHunter Denovo software 2 | 3 | ExpansionHunter Denovo (EHdn) is a suite of tools for detecting novel expansions 4 | of short tandem repeats (STRs). EHdn is intended for analysis of a collection of 5 | BAM/CRAM files containing alignments of short (100-200bp) reads. 6 | 7 | To use ExpansionHunter Denovo you need to (1) download the latest release or build 8 | the program from source, (2) obtain a collection of BAM/CRAM files a subset of which 9 | is suspected to contain a repeat expansion. 10 | 11 | The following sections will help you to get started. 12 | 13 | Quick start guides: 14 | 15 | - [Before you begin](01_Before_you_begin.md) 16 | - [Installation](02_Installation.md) 17 | - [Case-control analysis](03_Case_control_quickstart.md) 18 | - [Outlier analysis](04_Outlier_quickstart.md) 19 | 20 | Manual pages: 21 | 22 | - [Computing STR profiles](05_Computing_profiles.md) 23 | - [Merging single-sample STR profiles into multisample profiles](06_Merging_profiles.md) 24 | - [Performing case control analysis](07_Case_control_analysis.md) 25 | - [Annotating locus analysis results](08_Annotation.md) 26 | - [Repeat Prioritization Strategies](09_Repeat_prioritization_strategies.md) 27 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/repack.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | BOOST_VERSION=1.67.0 6 | 7 | function finish { 8 | rm -rf ${tmp_dir} 9 | } 10 | trap finish EXIT 11 | 12 | out_dir=$(pwd) 13 | patch_dir=$(pwd)/patch/${BOOST_VERSION} 14 | tmp_dir=$(mktemp -d) 15 | 16 | echo "Downloading Boost ${BOOST_VERSION}..." 17 | curl -L "https://boostorg.jfrog.io/artifactory/main/release/${BOOST_VERSION}/source/boost_${BOOST_VERSION//\./_}.tar.bz2" > ${tmp_dir}/boost_${BOOST_VERSION}.tar.bz2 18 | 19 | mkdir -p ${tmp_dir}/extract 20 | cd ${tmp_dir}/extract 21 | echo "Extracting archive..." 22 | tar xf ${tmp_dir}/boost_${BOOST_VERSION}.tar.bz2 23 | 24 | cd boost_* 25 | 26 | if [ -d "${patch_dir}" ]; then 27 | mkdir patch 28 | for f in ${patch_dir}/*.patch; do 29 | echo "Applying patch ${f}..." 30 | git apply --verbose $f 31 | cp $f patch/ 32 | done 33 | fi 34 | 35 | echo "Removing extra files..." 36 | find . -name "doc" -print0 | xargs -0 -- rm -rf 37 | find . -name "*.htm*" -delete 38 | find . -name "*.png" -delete 39 | find . -name "*.bmp" -delete 40 | find . -name "*.jpg" -delete 41 | 42 | cd .. 43 | echo "Recompressing archive..." 44 | tar cfJ ${out_dir}/boost_${BOOST_VERSION//./_}.tar.xz boost_* 45 | -------------------------------------------------------------------------------- /source/reads/Read.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #include "reads/Read.hh" 23 | 24 | std::ostream& operator<<(std::ostream& out, const Read& read) 25 | { 26 | out << read.name << "\t" << read.bases << "\t" << read.quals << "\t" << read.contigId << "\t" << read.pos << "\t" 27 | << read.mateContigId << "\t" << read.matePos << "\t" << read.mapq << "\t" << read.flag; 28 | return out; 29 | } 30 | -------------------------------------------------------------------------------- /source/io/HtsHelpers.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | extern "C" 25 | { 26 | #include "htslib/hts.h" 27 | #include "htslib/sam.h" 28 | } 29 | 30 | #include "reads/Read.hh" 31 | #include "region/ReferenceContigInfo.hh" 32 | 33 | bool isPrimaryAlignment(bam1_t* htsAlignPtr); 34 | Read decodeHtsRead(bam1_t* htsAlignPtr); 35 | ReferenceContigInfo decodeContigInfo(bam_hdr_t* htsHeaderPtr); 36 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/patch/1.63.0/context_0002_macOS_execution_context.patch: -------------------------------------------------------------------------------- 1 | From 3167d4dfb82aa74fcf41c755d6c9bc7a3401bfea Mon Sep 17 00:00:00 2001 2 | From: Timo Sandmann 3 | Date: Sun, 8 Jan 2017 18:24:20 +0100 4 | Subject: [PATCH] Fixes #38 5 | 6 | Use correct type cast and tuple extracting for pointer to transfered data tuples 7 | --- 8 | include/boost/context/execution_context_v2.hpp | 2 +- 9 | 1 file changed, 1 insertion(+), 1 deletion(-) 10 | 11 | diff --git a/boost/context/execution_context_v2.hpp b/boost/context/execution_context_v2.hpp 12 | index 33b9bda..bbd4eb1 100644 13 | --- a/boost/context/execution_context_v2.hpp 14 | +++ b/boost/context/execution_context_v2.hpp 15 | @@ -103,7 +103,7 @@ class record { 16 | 17 | transfer_t run( transfer_t t) { 18 | Ctx from{ t.fctx }; 19 | - typename Ctx::args_tpl_t args = std::move( * static_cast< typename Ctx::args_tpl_t * >( t.data) ); 20 | + typename Ctx::args_tpl_t args = std::move( std::get<1>( * static_cast< std::tuple< std::exception_ptr, typename Ctx::args_tpl_t > * >( t.data) ) ); 21 | auto tpl = std::tuple_cat( 22 | params_, 23 | std::forward_as_tuple( std::move( from) ), 24 | -------------------------------------------------------------------------------- /examples/outlier/manifest.tsv: -------------------------------------------------------------------------------- 1 | sample1 case str-profiles/sample1.str_profile.json 2 | sample2 control str-profiles/sample2.str_profile.json 3 | sample3 control str-profiles/sample3.str_profile.json 4 | sample4 control str-profiles/sample4.str_profile.json 5 | sample5 control str-profiles/sample5.str_profile.json 6 | sample6 control str-profiles/sample6.str_profile.json 7 | sample7 control str-profiles/sample7.str_profile.json 8 | sample8 control str-profiles/sample8.str_profile.json 9 | sample9 control str-profiles/sample9.str_profile.json 10 | sample10 control str-profiles/sample10.str_profile.json 11 | sample11 control str-profiles/sample11.str_profile.json 12 | sample12 control str-profiles/sample12.str_profile.json 13 | sample13 control str-profiles/sample13.str_profile.json 14 | sample14 control str-profiles/sample14.str_profile.json 15 | sample15 control str-profiles/sample15.str_profile.json 16 | sample16 control str-profiles/sample16.str_profile.json 17 | sample17 control str-profiles/sample17.str_profile.json 18 | sample18 control str-profiles/sample18.str_profile.json 19 | sample19 control str-profiles/sample19.str_profile.json 20 | sample20 control str-profiles/sample20.str_profile.json 21 | sample21 control str-profiles/sample21.str_profile.json 22 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/test.cmake: -------------------------------------------------------------------------------- 1 | _add_boost_lib( 2 | NAME unit_test_framework 3 | SOURCES 4 | ${BOOST_SOURCE}/libs/test/src/compiler_log_formatter.cpp 5 | ${BOOST_SOURCE}/libs/test/src/debug.cpp 6 | ${BOOST_SOURCE}/libs/test/src/decorator.cpp 7 | ${BOOST_SOURCE}/libs/test/src/execution_monitor.cpp 8 | ${BOOST_SOURCE}/libs/test/src/framework.cpp 9 | ${BOOST_SOURCE}/libs/test/src/junit_log_formatter.cpp 10 | ${BOOST_SOURCE}/libs/test/src/plain_report_formatter.cpp 11 | ${BOOST_SOURCE}/libs/test/src/progress_monitor.cpp 12 | ${BOOST_SOURCE}/libs/test/src/results_collector.cpp 13 | ${BOOST_SOURCE}/libs/test/src/results_reporter.cpp 14 | ${BOOST_SOURCE}/libs/test/src/test_framework_init_observer.cpp 15 | ${BOOST_SOURCE}/libs/test/src/test_tools.cpp 16 | ${BOOST_SOURCE}/libs/test/src/test_tree.cpp 17 | ${BOOST_SOURCE}/libs/test/src/unit_test_log.cpp 18 | ${BOOST_SOURCE}/libs/test/src/unit_test_main.cpp 19 | ${BOOST_SOURCE}/libs/test/src/unit_test_monitor.cpp 20 | ${BOOST_SOURCE}/libs/test/src/unit_test_parameters.cpp 21 | ${BOOST_SOURCE}/libs/test/src/xml_log_formatter.cpp 22 | ${BOOST_SOURCE}/libs/test/src/xml_report_formatter.cpp 23 | LINK 24 | Boost::timer 25 | ) 26 | -------------------------------------------------------------------------------- /source/reads/Read.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | #include 25 | #include 26 | 27 | struct Read 28 | { 29 | std::string name; 30 | std::string bases; 31 | std::string quals; 32 | int contigId; 33 | size_t pos; 34 | int mateContigId; 35 | size_t matePos; 36 | size_t mapq; 37 | size_t flag; 38 | }; 39 | 40 | std::ostream& operator<<(std::ostream& out, const Read& read); 41 | -------------------------------------------------------------------------------- /source/profile/ReadClassification.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | #pragma once 22 | 23 | #include 24 | 25 | #include "common/Interval.hh" 26 | #include "profile/PairCollector.hh" 27 | 28 | ReadType 29 | classifyRead(Interval motifSizeRange, int max_irr_mapq, int min_anchor_mapq, const Read& read, std::string& unit); 30 | PairType 31 | classifyPair(ReadType read_type, const std::string& read_unit, ReadType mate_type, const std::string& mate_unit); 32 | -------------------------------------------------------------------------------- /source/tests/SequenceUtilsTest.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #include "common/SequenceUtils.hh" 23 | 24 | #include 25 | #include 26 | 27 | #include "thirdparty/catch2/catch.hpp" 28 | 29 | using std::string; 30 | using std::vector; 31 | 32 | TEST_CASE("Reverse complement can be computed for any sequence", "[reverse complementing]") 33 | { 34 | const string bases = "ATCGN"; 35 | REQUIRE("NCGAT" == reverseComplement(bases)); 36 | } 37 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/sinks/msvc_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Alexander Dalshov. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #ifndef SPDLOG_H 9 | #include "spdlog/spdlog.h" 10 | #endif 11 | 12 | #if defined(_WIN32) 13 | 14 | #include "spdlog/details/null_mutex.h" 15 | #include "spdlog/sinks/base_sink.h" 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | 22 | namespace spdlog { 23 | namespace sinks { 24 | /* 25 | * MSVC sink (logging using OutputDebugStringA) 26 | */ 27 | template 28 | class msvc_sink : public base_sink 29 | { 30 | public: 31 | explicit msvc_sink() {} 32 | 33 | protected: 34 | void sink_it_(const details::log_msg &msg) override 35 | { 36 | 37 | fmt::memory_buffer formatted; 38 | sink::formatter_->format(msg, formatted); 39 | OutputDebugStringA(fmt::to_string(formatted).c_str()); 40 | } 41 | 42 | void flush_() override {} 43 | }; 44 | 45 | using msvc_sink_mt = msvc_sink; 46 | using msvc_sink_st = msvc_sink; 47 | 48 | using windebug_sink_mt = msvc_sink_mt; 49 | using windebug_sink_st = msvc_sink_st; 50 | 51 | } // namespace sinks 52 | } // namespace spdlog 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/math.cmake: -------------------------------------------------------------------------------- 1 | set(math_c99 2 | acosh 3 | asinh 4 | atanh 5 | cbrt 6 | copysign 7 | erfc 8 | erf 9 | expm1 10 | fmax 11 | fmin 12 | fpclassify 13 | hypot 14 | lgamma 15 | llround 16 | log1p 17 | lround 18 | nextafter 19 | nexttoward 20 | round 21 | tgamma 22 | trunc 23 | ) 24 | 25 | set(math_tr1 26 | assoc_laguerre 27 | assoc_legendre 28 | beta 29 | comp_ellint_1 30 | comp_ellint_2 31 | comp_ellint_3 32 | cyl_bessel_i 33 | cyl_bessel_j 34 | cyl_bessel_k 35 | cyl_neumann 36 | ellint_1 37 | ellint_2 38 | ellint_3 39 | expint 40 | hermite 41 | laguerre 42 | legendre 43 | riemann_zeta 44 | sph_bessel 45 | sph_legendre 46 | sph_neumann 47 | ) 48 | 49 | foreach(c99src ${math_c99}) 50 | list(APPEND math_srcs 51 | ${BOOST_SOURCE}/libs/math/src/tr1/${c99src}.cpp 52 | ${BOOST_SOURCE}/libs/math/src/tr1/${c99src}f.cpp 53 | ) 54 | endforeach() 55 | 56 | foreach(tr1src ${math_tr1}) 57 | list(APPEND math_srcs 58 | ${BOOST_SOURCE}/libs/math/src/tr1/${tr1src}.cpp 59 | ${BOOST_SOURCE}/libs/math/src/tr1/${tr1src}f.cpp 60 | ) 61 | endforeach() 62 | 63 | _add_boost_lib( 64 | NAME math 65 | SOURCES 66 | ${math_srcs} 67 | ) 68 | target_include_directories(Boost_math PRIVATE ${BOOST_SOURCE}/libs/math/src/tr1) 69 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/mpi.cmake: -------------------------------------------------------------------------------- 1 | find_package(MPI QUIET) 2 | 3 | if(NOT MPI_CXX_FOUND) 4 | return() 5 | endif() 6 | 7 | _add_boost_lib( 8 | NAME mpi 9 | SOURCES 10 | ${BOOST_SOURCE}/libs/mpi/src/content_oarchive.cpp 11 | ${BOOST_SOURCE}/libs/mpi/src/environment.cpp 12 | ${BOOST_SOURCE}/libs/mpi/src/exception.cpp 13 | ${BOOST_SOURCE}/libs/mpi/src/graph_communicator.cpp 14 | ${BOOST_SOURCE}/libs/mpi/src/group.cpp 15 | ${BOOST_SOURCE}/libs/mpi/src/intercommunicator.cpp 16 | ${BOOST_SOURCE}/libs/mpi/src/mpi_datatype_cache.cpp 17 | ${BOOST_SOURCE}/libs/mpi/src/mpi_datatype_oarchive.cpp 18 | ${BOOST_SOURCE}/libs/mpi/src/packed_iarchive.cpp 19 | ${BOOST_SOURCE}/libs/mpi/src/packed_oarchive.cpp 20 | ${BOOST_SOURCE}/libs/mpi/src/packed_skeleton_iarchive.cpp 21 | ${BOOST_SOURCE}/libs/mpi/src/packed_skeleton_oarchive.cpp 22 | ${BOOST_SOURCE}/libs/mpi/src/point_to_point.cpp 23 | ${BOOST_SOURCE}/libs/mpi/src/request.cpp 24 | ${BOOST_SOURCE}/libs/mpi/src/text_skeleton_oarchive.cpp 25 | ${BOOST_SOURCE}/libs/mpi/src/timer.cpp 26 | DEFINE_PRIVATE 27 | BOOST_MPI_SOURCE=1 28 | LINK 29 | Boost::serialization 30 | ) 31 | target_include_directories(Boost_mpi PUBLIC ${MPI_CXX_INCLUDE_PATH}) 32 | target_link_libraries(Boost_mpi PUBLIC ${MPI_CXX_LINK_FLAGS} ${MPI_CXX_LIBRARIES}) 33 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/sinks/null_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #ifndef SPDLOG_H 9 | #include "spdlog/spdlog.h" 10 | #endif 11 | 12 | #include "spdlog/details/null_mutex.h" 13 | #include "spdlog/sinks/base_sink.h" 14 | 15 | #include 16 | 17 | namespace spdlog { 18 | namespace sinks { 19 | 20 | template 21 | class null_sink : public base_sink 22 | { 23 | protected: 24 | void sink_it_(const details::log_msg &) override {} 25 | void flush_() override {} 26 | }; 27 | 28 | using null_sink_mt = null_sink; 29 | using null_sink_st = null_sink; 30 | 31 | } // namespace sinks 32 | 33 | template 34 | inline std::shared_ptr null_logger_mt(const std::string &logger_name) 35 | { 36 | auto null_logger = Factory::template create(logger_name); 37 | null_logger->set_level(level::off); 38 | return null_logger; 39 | } 40 | 41 | template 42 | inline std::shared_ptr null_logger_st(const std::string &logger_name) 43 | { 44 | auto null_logger = Factory::template create(logger_name); 45 | null_logger->set_level(level::off); 46 | return null_logger; 47 | } 48 | 49 | } // namespace spdlog 50 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/fmt/bundled/LICENSE.rst: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 - 2016, Victor Zverovich 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/details/log_msg.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/common.h" 9 | #include "spdlog/details/os.h" 10 | 11 | #include 12 | #include 13 | 14 | namespace spdlog { 15 | namespace details { 16 | struct log_msg 17 | { 18 | 19 | log_msg(source_loc loc, const std::string *loggers_name, level::level_enum lvl, string_view_t view) 20 | : logger_name(loggers_name) 21 | , level(lvl) 22 | #ifndef SPDLOG_NO_DATETIME 23 | , time(os::now()) 24 | #endif 25 | 26 | #ifndef SPDLOG_NO_THREAD_ID 27 | , thread_id(os::thread_id()) 28 | #endif 29 | , source(loc) 30 | , payload(view) 31 | { 32 | } 33 | 34 | log_msg(const std::string *loggers_name, level::level_enum lvl, string_view_t view) 35 | : log_msg(source_loc{}, loggers_name, lvl, view) 36 | { 37 | } 38 | 39 | log_msg(const log_msg &other) = default; 40 | 41 | const std::string *logger_name{nullptr}; 42 | level::level_enum level{level::off}; 43 | log_clock::time_point time; 44 | size_t thread_id{0}; 45 | size_t msg_id{0}; 46 | 47 | // wrapping the formatted text with color (updated by pattern_formatter). 48 | mutable size_t color_range_start{0}; 49 | mutable size_t color_range_end{0}; 50 | 51 | source_loc source; 52 | const string_view_t payload; 53 | }; 54 | } // namespace details 55 | } // namespace spdlog 56 | -------------------------------------------------------------------------------- /source/io/Reference.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | #include "htslib/faidx.h" 25 | 26 | #include "region/GenomicRegion.hh" 27 | #include "region/ReferenceContigInfo.hh" 28 | 29 | class Reference 30 | { 31 | public: 32 | explicit Reference(const std::string& referencePath); 33 | ~Reference(); 34 | 35 | std::string getSequence(const std::string& contigIndex, int64_t start, int64_t end) const; 36 | std::string getSequence(const GenomicRegion& region) const; 37 | 38 | const ReferenceContigInfo& contigInfo() const { return contigInfo_; } 39 | 40 | private: 41 | std::string referencePath_; 42 | faidx_t* htsFastaIndexPtr_; 43 | ReferenceContigInfo contigInfo_; 44 | }; 45 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/LICENSE.md: -------------------------------------------------------------------------------- 1 | New BSD License 2 | =============== 3 | 4 | Copyright (c) 2016, Florent Castelli 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 11 | * Neither the name of the Phalcon nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PHALCON FRAMEWORK TEAM BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 14 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/sinks/ostream_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #ifndef SPDLOG_H 9 | #include "spdlog/spdlog.h" 10 | #endif 11 | 12 | #include "spdlog/details/null_mutex.h" 13 | #include "spdlog/sinks/base_sink.h" 14 | 15 | #include 16 | #include 17 | 18 | namespace spdlog { 19 | namespace sinks { 20 | template 21 | class ostream_sink final : public base_sink 22 | { 23 | public: 24 | explicit ostream_sink(std::ostream &os, bool force_flush = false) 25 | : ostream_(os) 26 | , force_flush_(force_flush) 27 | { 28 | } 29 | ostream_sink(const ostream_sink &) = delete; 30 | ostream_sink &operator=(const ostream_sink &) = delete; 31 | 32 | protected: 33 | void sink_it_(const details::log_msg &msg) override 34 | { 35 | fmt::memory_buffer formatted; 36 | sink::formatter_->format(msg, formatted); 37 | ostream_.write(formatted.data(), static_cast(formatted.size())); 38 | if (force_flush_) 39 | { 40 | ostream_.flush(); 41 | } 42 | } 43 | 44 | void flush_() override 45 | { 46 | ostream_.flush(); 47 | } 48 | 49 | std::ostream &ostream_; 50 | bool force_flush_; 51 | }; 52 | 53 | using ostream_sink_mt = ostream_sink; 54 | using ostream_sink_st = ostream_sink; 55 | 56 | } // namespace sinks 57 | } // namespace spdlog 58 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/details/console_globals.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // 3 | // Copyright(c) 2018 Gabi Melman. 4 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 5 | // 6 | 7 | #include "spdlog/details/null_mutex.h" 8 | #include 9 | #include 10 | 11 | #ifdef _WIN32 12 | 13 | #ifndef NOMINMAX 14 | #define NOMINMAX // prevent windows redefining min/max 15 | #endif 16 | 17 | #ifndef WIN32_LEAN_AND_MEAN 18 | #define WIN32_LEAN_AND_MEAN 19 | #endif 20 | 21 | #include 22 | #endif 23 | 24 | namespace spdlog { 25 | namespace details { 26 | struct console_stdout 27 | { 28 | static std::FILE *stream() 29 | { 30 | return stdout; 31 | } 32 | #ifdef _WIN32 33 | static HANDLE handle() 34 | { 35 | return ::GetStdHandle(STD_OUTPUT_HANDLE); 36 | } 37 | #endif 38 | }; 39 | 40 | struct console_stderr 41 | { 42 | static std::FILE *stream() 43 | { 44 | return stderr; 45 | } 46 | #ifdef _WIN32 47 | static HANDLE handle() 48 | { 49 | return ::GetStdHandle(STD_ERROR_HANDLE); 50 | } 51 | #endif 52 | }; 53 | 54 | struct console_mutex 55 | { 56 | using mutex_t = std::mutex; 57 | static mutex_t &mutex() 58 | { 59 | static mutex_t s_mutex; 60 | return s_mutex; 61 | } 62 | }; 63 | 64 | struct console_nullmutex 65 | { 66 | using mutex_t = null_mutex; 67 | static mutex_t &mutex() 68 | { 69 | static mutex_t s_mutex; 70 | return s_mutex; 71 | } 72 | }; 73 | } // namespace details 74 | } // namespace spdlog 75 | -------------------------------------------------------------------------------- /source/merge/MultisampleProfile.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | #include 25 | #include 26 | 27 | #include "region/GenomicRegion.hh" 28 | 29 | using SampleId = std::string; 30 | using Motif = std::string; 31 | 32 | using SampleToIrrPairCount = std::unordered_map; 33 | using MultisampleIrrPairProfile = std::unordered_map; 34 | using MultisampleAnchoredIrrProfile = std::unordered_map>; 35 | 36 | void normalize(MultisampleAnchoredIrrProfile& profile); 37 | void add( 38 | const SampleId& sampleId, const Motif& motif, const GenomicRegion& region, int numAnchoredIrrs, 39 | MultisampleAnchoredIrrProfile& profile); 40 | -------------------------------------------------------------------------------- /source/common/Interval.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | #include 25 | #include 26 | 27 | class Interval 28 | { 29 | public: 30 | Interval(int start, int end) 31 | : start_(start) 32 | , end_(end) 33 | { 34 | if (start_ > end_) 35 | { 36 | const auto interval = "(" + std::to_string(start_) + ", " + std::to_string(end_) + ")"; 37 | throw std::runtime_error("Invalid interval endpoints " + interval); 38 | } 39 | } 40 | 41 | int start() const { return start_; } 42 | int end() const { return end_; } 43 | bool contains(int value) const { return start_ <= value && value <= end_; } 44 | 45 | private: 46 | int start_; 47 | int end_; 48 | }; 49 | -------------------------------------------------------------------------------- /source/merge/MultisampleProfile.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #include "merge/MultisampleProfile.hh" 23 | 24 | using std::vector; 25 | 26 | void normalize(MultisampleAnchoredIrrProfile& profile) 27 | { 28 | for (auto& motifAndSampleCounts : profile) 29 | { 30 | sortAndMerge(motifAndSampleCounts.second); 31 | } 32 | } 33 | 34 | void add( 35 | const SampleId& sampleId, const Motif& motif, const GenomicRegion& region, int numAnchoredIrrs, 36 | MultisampleAnchoredIrrProfile& profile) 37 | { 38 | SampleCountFeature sampleCount({ { sampleId, numAnchoredIrrs } }); 39 | RegionWithSampleCount regionWithSampleCount(region.contigId(), region.start(), region.end(), sampleCount); 40 | profile[motif].push_back(regionWithSampleCount); 41 | } 42 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/cmake/Modules/AddBoostLib.cmake: -------------------------------------------------------------------------------- 1 | function(_add_boost_lib) 2 | set(options ) 3 | set(oneValueArgs NAME) 4 | set(multiValueArgs SOURCES LINK DEFINE DEFINE_PRIVATE CXXFLAGS_PRIVATE INCLUDE_PRIVATE) 5 | cmake_parse_arguments(BOOSTLIB "${options}" "${oneValueArgs}" 6 | "${multiValueArgs}" ${ARGN}) 7 | add_library(Boost_${BOOSTLIB_NAME} STATIC ${BOOSTLIB_SOURCES}) 8 | add_library(Boost::${BOOSTLIB_NAME} ALIAS Boost_${BOOSTLIB_NAME}) 9 | set_target_properties(Boost_${BOOSTLIB_NAME} PROPERTIES 10 | OUTPUT_NAME "boost_${BOOSTLIB_NAME}" 11 | FOLDER "Boost" 12 | ) 13 | if(NOT BOOST_STANDALONE) 14 | set_target_properties(Boost_${BOOSTLIB_NAME} PROPERTIES EXCLUDE_FROM_ALL 1) 15 | endif() 16 | target_link_libraries(Boost_${BOOSTLIB_NAME} PUBLIC Boost::boost) 17 | if(MSVC) 18 | target_compile_options(Boost_${BOOSTLIB_NAME} PRIVATE /W0) 19 | else() 20 | target_compile_options(Boost_${BOOSTLIB_NAME} PRIVATE -w) 21 | endif() 22 | if(BOOSTLIB_LINK) 23 | target_link_libraries(Boost_${BOOSTLIB_NAME} PUBLIC ${BOOSTLIB_LINK}) 24 | endif() 25 | if(BOOSTLIB_DEFINE) 26 | target_compile_definitions(Boost_${BOOSTLIB_NAME} PUBLIC ${BOOSTLIB_DEFINE}) 27 | endif() 28 | if(BOOSTLIB_DEFINE_PRIVATE) 29 | target_compile_definitions(Boost_${BOOSTLIB_NAME} PRIVATE ${BOOSTLIB_DEFINE_PRIVATE}) 30 | endif() 31 | if(BOOSTLIB_CXXFLAGS_PRIVATE) 32 | target_compile_options(Boost_${BOOSTLIB_NAME} PRIVATE ${BOOSTLIB_CXXFLAGS_PRIVATE}) 33 | endif() 34 | if(BOOSTLIB_INCLUDE_PRIVATE) 35 | target_include_directories(Boost_${BOOSTLIB_NAME} PRIVATE ${BOOSTLIB_INCLUDE_PRIVATE}) 36 | endif() 37 | endfunction() 38 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/program_options.cmake: -------------------------------------------------------------------------------- 1 | _add_boost_lib( 2 | NAME program_options 3 | SOURCES 4 | ${BOOST_SOURCE}/libs/program_options/src/cmdline.cpp 5 | ${BOOST_SOURCE}/libs/program_options/src/config_file.cpp 6 | ${BOOST_SOURCE}/libs/program_options/src/options_description.cpp 7 | ${BOOST_SOURCE}/libs/program_options/src/parsers.cpp 8 | ${BOOST_SOURCE}/libs/program_options/src/variables_map.cpp 9 | ${BOOST_SOURCE}/libs/program_options/src/value_semantic.cpp 10 | ${BOOST_SOURCE}/libs/program_options/src/positional_options.cpp 11 | ${BOOST_SOURCE}/libs/program_options/src/utf8_codecvt_facet.cpp 12 | ${BOOST_SOURCE}/libs/program_options/src/convert.cpp 13 | ${BOOST_SOURCE}/libs/program_options/src/winmain.cpp 14 | ${BOOST_SOURCE}/libs/program_options/src/split.cpp 15 | ) 16 | 17 | _add_boost_test( 18 | NAME program_options_test 19 | LINK 20 | Boost::program_options 21 | TESTS 22 | RUN ${BOOST_SOURCE}/libs/program_options/test/options_description_test.cpp 23 | RUN ${BOOST_SOURCE}/libs/program_options/test/variable_map_test.cpp 24 | RUN ${BOOST_SOURCE}/libs/program_options/test/cmdline_test.cpp 25 | RUN ${BOOST_SOURCE}/libs/program_options/test/positional_options_test.cpp 26 | RUN ${BOOST_SOURCE}/libs/program_options/test/unicode_test.cpp 27 | RUN ${BOOST_SOURCE}/libs/program_options/test/winmain.cpp 28 | RUN ${BOOST_SOURCE}/libs/program_options/test/exception_test.cpp 29 | RUN ${BOOST_SOURCE}/libs/program_options/test/split_test.cpp 30 | RUN ${BOOST_SOURCE}/libs/program_options/test/unrecognized_test.cpp 31 | RUN ${BOOST_SOURCE}/libs/program_options/test/exception_txt_test.cpp 32 | ) 33 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/sinks/sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/details/log_msg.h" 9 | #include "spdlog/details/pattern_formatter.h" 10 | #include "spdlog/formatter.h" 11 | 12 | namespace spdlog { 13 | namespace sinks { 14 | class sink 15 | { 16 | public: 17 | sink() 18 | : level_(level::trace) 19 | , formatter_(new pattern_formatter()) 20 | { 21 | } 22 | 23 | explicit sink(std::unique_ptr formatter) 24 | : level_(level::trace) 25 | , formatter_(std::move(formatter)) 26 | { 27 | } 28 | 29 | virtual ~sink() = default; 30 | virtual void log(const details::log_msg &msg) = 0; 31 | virtual void flush() = 0; 32 | virtual void set_pattern(const std::string &pattern) = 0; 33 | virtual void set_formatter(std::unique_ptr sink_formatter) = 0; 34 | 35 | bool should_log(level::level_enum msg_level) const 36 | { 37 | return msg_level >= level_.load(std::memory_order_relaxed); 38 | } 39 | 40 | void set_level(level::level_enum log_level) 41 | { 42 | level_.store(log_level); 43 | } 44 | 45 | level::level_enum level() const 46 | { 47 | return static_cast(level_.load(std::memory_order_relaxed)); 48 | } 49 | 50 | protected: 51 | // sink log level - default is all 52 | level_t level_; 53 | 54 | // sink formatter - default is full format 55 | std::unique_ptr formatter_; 56 | }; 57 | 58 | } // namespace sinks 59 | } // namespace spdlog 60 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/filesystem.cmake: -------------------------------------------------------------------------------- 1 | _add_boost_lib( 2 | NAME filesystem 3 | SOURCES 4 | ${BOOST_SOURCE}/libs/filesystem/src/codecvt_error_category.cpp 5 | ${BOOST_SOURCE}/libs/filesystem/src/operations.cpp 6 | ${BOOST_SOURCE}/libs/filesystem/src/path.cpp 7 | ${BOOST_SOURCE}/libs/filesystem/src/path_traits.cpp 8 | ${BOOST_SOURCE}/libs/filesystem/src/portability.cpp 9 | ${BOOST_SOURCE}/libs/filesystem/src/unique_path.cpp 10 | ${BOOST_SOURCE}/libs/filesystem/src/utf8_codecvt_facet.cpp 11 | ${BOOST_SOURCE}/libs/filesystem/src/windows_file_codecvt.cpp 12 | DEFINE_PRIVATE 13 | BOOST_FILESYSTEM_STATIC_LINK=1 14 | ) 15 | 16 | _add_boost_test( 17 | NAME filesystem_test 18 | LINK 19 | Boost::filesystem 20 | TESTS 21 | RUN ${BOOST_SOURCE}/libs/filesystem/test/convenience_test.cpp 22 | RUN ${BOOST_SOURCE}/libs/filesystem/test/macro_default_test.cpp 23 | RUN ${BOOST_SOURCE}/libs/filesystem/test/odr1_test.cpp 24 | ${BOOST_SOURCE}/libs/filesystem/test/odr2_test.cpp 25 | RUN ${BOOST_SOURCE}/libs/filesystem/test/deprecated_test.cpp 26 | RUN ${BOOST_SOURCE}/libs/filesystem/test/fstream_test.cpp 27 | RUN ${BOOST_SOURCE}/libs/filesystem/test/large_file_support_test.cpp 28 | RUN ${BOOST_SOURCE}/libs/filesystem/test/locale_info.cpp 29 | #RUN ${BOOST_SOURCE}/libs/filesystem/test/operations_test.cpp 30 | RUN ${BOOST_SOURCE}/libs/filesystem/test/path_test.cpp 31 | RUN ${BOOST_SOURCE}/libs/filesystem/test/path_unit_test.cpp 32 | RUN ${BOOST_SOURCE}/libs/filesystem/test/relative_test.cpp 33 | RUN ${BOOST_SOURCE}/libs/filesystem/example/simple_ls.cpp 34 | RUN ${BOOST_SOURCE}/libs/filesystem/example/file_status.cpp 35 | ) 36 | -------------------------------------------------------------------------------- /source/common/SequenceUtils.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #include "SequenceUtils.hh" 23 | 24 | using std::string; 25 | 26 | string reverseComplement(const string& bases) 27 | { 28 | string bases_rc = bases; 29 | string::reverse_iterator bases_rc_iter = bases_rc.rbegin(); 30 | 31 | char complemented_base = ' '; 32 | for (char base : bases) 33 | { 34 | switch (base) 35 | { 36 | case 'A': 37 | complemented_base = 'T'; 38 | break; 39 | case 'C': 40 | complemented_base = 'G'; 41 | break; 42 | case 'G': 43 | complemented_base = 'C'; 44 | break; 45 | case 'T': 46 | complemented_base = 'A'; 47 | break; 48 | default: 49 | complemented_base = 'N'; 50 | } 51 | *bases_rc_iter++ = complemented_base; 52 | } 53 | 54 | return bases_rc; 55 | } 56 | -------------------------------------------------------------------------------- /documentation/07_Case_control_analysis.md: -------------------------------------------------------------------------------- 1 | # Case-control analysis 2 | 3 | The case-control analysis is performed by a Python3 script `casecontrol.py` 4 | located inside `scripts` directory. The locus-based analysis can be run 5 | like so: 6 | 7 | ```bash 8 | python3 casecontrol.py locus \ 9 | --manifest manifest.txt \ 10 | --multisample-profile multisample_profile.json \ 11 | --output-prefix output 12 | ``` 13 | 14 | The command to run the motif-based analysis is nearly identical: 15 | 16 | ```bash 17 | python3 casecontrol.py motif \ 18 | --manifest manifest.txt \ 19 | --multisample-profile multisample_profile.json \ 20 | --output-prefix output 21 | ``` 22 | 23 | The input parameters manifest.txt and multisample_profile.json are as 24 | [described previously](04_Merging_profiles.md). 25 | 26 | | Optional parameter | Description | Default | 27 | |--------------------|--------------------------------------------------------------|:-------:| 28 | | --min-count | Minimum number reads in a region for downstream analysis | 5 | 29 | | --target-regions | BED file with regions to which analysis should be restricted | NA | 30 | | --test-method | Method of calculating Wilcoxon Rank-Sum Test p-value* | normal | 31 | 32 | \* The default value `normal` invokes the Normal approximation appropriate for 33 | larger samples. To compute the p-value directly for smaller samples, use 34 | `permute_` where N is the number of permutations. For example, 35 | permute_1000000 invokes a test with 1000000 permutations. 36 | 37 | ## Outputs 38 | 39 | The program produces two output files. One of them summarizes per-locus 40 | comparison of in-repeat reads. The other file summarizes the overall genome-wide 41 | comparison of motifs. 42 | -------------------------------------------------------------------------------- /source/region/ReferenceContigInfo.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | // Handles translation between contig names and indexes 32 | class ReferenceContigInfo 33 | { 34 | public: 35 | explicit ReferenceContigInfo(std::vector> namesAndSizes); 36 | 37 | int numContigs() const { return namesAndSizes_.size(); } 38 | const std::string& getContigName(int contigIndex) const; 39 | int64_t getContigSize(int contigIndex) const; 40 | int getContigId(const std::string& contigName) const; 41 | 42 | private: 43 | void assertValidIndex(int contigIndex) const; 44 | 45 | std::vector> namesAndSizes_; 46 | std::unordered_map nameToIndex_; 47 | }; 48 | 49 | std::ostream& operator<<(std::ostream& out, const ReferenceContigInfo& contigInfo); 50 | -------------------------------------------------------------------------------- /documentation/01_Before_you_begin.md: -------------------------------------------------------------------------------- 1 | # Before you begin 2 | 3 | ExpansionHunter Denovo (EHdn) can be used to analyze a collection of BAM/CRAM 4 | files containing alignments of short (100-200bp) reads. For best results, the 5 | samples should be sequenced on the same instrument to similar coverage of at 6 | least 30x. All data should be aligned with the **same short-read aligner** 7 | ideally without any post-processing steps such as indel realignment or 8 | recalibration. 9 | 10 | Your dataset should contain one or more samples that are suspected to harbor a 11 | repeat expansion and a set of controls. If the controls are not available, 12 | consider using 13 | [Illumina Polaris](https://github.com/Illumina/Polaris/wiki/HiSeqX-Diversity-Cohort) 14 | dataset. 15 | 16 | ## Choosing between case/control and outlier analysis 17 | 18 | EHdn supports two secondary analysis types: case/control analysis and outlier 19 | analysis. Case/control analysis is appropriate for detecting expansions present 20 | in a significant proportion of cases and largely absent from controls. Outlier 21 | analysis, on the other hand, should be used when an expansion might be present 22 | only in one or few case samples. 23 | 24 | ![AnalysisTypes](./images/analysis-types.png) 25 | 26 | **Example 1** If the set of cases consists of many related individuals 27 | exhibiting the same phenotype then a **case/control** analysis may be 28 | appropriate. 29 | 30 | **Example 2** If the set of cases consists of ALS patients, the **case/control** 31 | analysis is expected to flag the GGCCCC repeat in *C9orf72* gene as highly 32 | significant. 33 | 34 | **Example 3** If the set of cases is an undiagnosed rare disease cohort 35 | exhibiting a mix of phenotypes, it is unlikely that there is an enrichment for 36 | any specific expansion and hence the **outlier** analysis is appropriate. 37 | -------------------------------------------------------------------------------- /source/io/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(Threads) 2 | 3 | include(ExternalProject) 4 | include(FindZLIB) 5 | include(FindBZip2) 6 | include(FindLibLZMA) 7 | 8 | #ExternalProject_Add(htslib 9 | # PREFIX ${CMAKE_BINARY_DIR}/thirdparty/htslib 10 | # GIT_REPOSITORY "https://github.com/samtools/htslib.git" 11 | # GIT_TAG "1.3.1" 12 | # UPDATE_COMMAND "" 13 | # BUILD_IN_SOURCE 1 14 | # CONFIGURE_COMMAND "./configure" 15 | # BUILD_COMMAND make 16 | # INSTALL_COMMAND make install prefix=${CMAKE_BINARY_DIR}/thirdparty/htslib 17 | # LOG_DOWNLOAD 1 18 | # ) 19 | 20 | 21 | ExternalProject_Add(htslib_project 22 | URL ${CMAKE_SOURCE_DIR}/thirdparty/htslib-1.9.tar.bz2 23 | SOURCE_DIR ${CMAKE_BINARY_DIR}/thirdparty/htslib-1.9-source 24 | INSTALL_DIR ${CMAKE_BINARY_DIR}/thirdparty/htslib-1.9 25 | CONFIGURE_COMMAND "" 26 | BUILD_COMMAND make -C prefix= 27 | INSTALL_COMMAND make -C install prefix= 28 | ) 29 | 30 | #set(htslib_static ${CMAKE_BINARY_DIR}/thirdparty/htslib/lib/libhts.a) 31 | add_library(htslib STATIC IMPORTED) 32 | add_dependencies(htslib htslib_project) 33 | set_property(TARGET htslib 34 | PROPERTY IMPORTED_LOCATION ${CMAKE_BINARY_DIR}/thirdparty/htslib-1.9/lib/libhts.a) 35 | 36 | 37 | add_library(io STATIC 38 | HtsFileStreamer.hh HtsFileStreamer.cpp 39 | HtsHelpers.hh HtsHelpers.cpp 40 | Reference.hh Reference.cpp) 41 | 42 | target_include_directories(io PUBLIC 43 | ${CMAKE_SOURCE_DIR} 44 | ${CMAKE_BINARY_DIR}/thirdparty/htslib-1.9/include 45 | ) 46 | 47 | target_link_libraries(io PUBLIC 48 | reads 49 | htslib 50 | ${ZLIB_LIBRARIES} 51 | ${LIBLZMA_LIBRARIES} 52 | ${BZIP2_LIBRARIES} 53 | ${CMAKE_THREAD_LIBS_INIT} 54 | ) 55 | 56 | add_dependencies(io htslib) 57 | 58 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/details/circular_q.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2018 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | // cirucal q view of std::vector. 7 | #pragma once 8 | 9 | #include 10 | 11 | namespace spdlog { 12 | namespace details { 13 | template 14 | class circular_q 15 | { 16 | public: 17 | using item_type = T; 18 | 19 | explicit circular_q(size_t max_items) 20 | : max_items_(max_items + 1) // one item is reserved as marker for full q 21 | , v_(max_items_) 22 | { 23 | } 24 | 25 | // push back, overrun (oldest) item if no room left 26 | void push_back(T &&item) 27 | { 28 | v_[tail_] = std::move(item); 29 | tail_ = (tail_ + 1) % max_items_; 30 | 31 | if (tail_ == head_) // overrun last item if full 32 | { 33 | head_ = (head_ + 1) % max_items_; 34 | ++overrun_counter_; 35 | } 36 | } 37 | 38 | // Pop item from front. 39 | // If there are no elements in the container, the behavior is undefined. 40 | void pop_front(T &popped_item) 41 | { 42 | popped_item = std::move(v_[head_]); 43 | head_ = (head_ + 1) % max_items_; 44 | } 45 | 46 | bool empty() 47 | { 48 | return tail_ == head_; 49 | } 50 | 51 | bool full() 52 | { 53 | // head is ahead of the tail by 1 54 | return ((tail_ + 1) % max_items_) == head_; 55 | } 56 | 57 | size_t overrun_counter() const 58 | { 59 | return overrun_counter_; 60 | } 61 | 62 | private: 63 | size_t max_items_; 64 | typename std::vector::size_type head_ = 0; 65 | typename std::vector::size_type tail_ = 0; 66 | 67 | std::vector v_; 68 | 69 | size_t overrun_counter_ = 0; 70 | }; 71 | } // namespace details 72 | } // namespace spdlog 73 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | # Starter pipeline 2 | # Start with a minimal pipeline that you can customize to build and deploy your code. 3 | # Add steps that build, run tests, deploy, and more: 4 | # https://aka.ms/yaml 5 | 6 | strategy: 7 | matrix: 8 | Linux-GCC: 9 | imageName: 'ubuntu-16.04' 10 | BUILD_TARGET: 'Linux' 11 | BUILD_COMPILER: 'GCC' 12 | Linux-Clang: 13 | imageName: 'ubuntu-16.04' 14 | BUILD_TARGET: 'Linux' 15 | BUILD_COMPILER: 'Clang' 16 | Android-arm64: 17 | imageName: 'ubuntu-16.04' 18 | BUILD_TARGET: 'Android' 19 | BUILD_ARCH: 'arm64-v8a' 20 | Android-arm: 21 | imageName: 'ubuntu-16.04' 22 | BUILD_TARGET: 'Android' 23 | BUILD_ARCH: 'armeabi-v7a' 24 | Android-x86: 25 | imageName: 'ubuntu-16.04' 26 | BUILD_TARGET: 'Android' 27 | BUILD_ARCH: 'x86' 28 | Android-x86_64: 29 | imageName: 'ubuntu-16.04' 30 | BUILD_TARGET: 'Android' 31 | BUILD_ARCH: 'x86_64' 32 | macOS: 33 | imageName: 'macos-latest' 34 | BUILD_TARGET: 'macOS' 35 | iOS: 36 | imageName: 'macos-latest' 37 | BUILD_TARGET: 'iOS' 38 | Windows-v141-amd64: 39 | imageName: 'vs2017-win2016' 40 | BUILD_TARGET: 'Windows' 41 | BUILD_TOOLCHAIN: 'amd64' 42 | Windows-v141-x86: 43 | imageName: 'vs2017-win2016' 44 | BUILD_TARGET: 'Windows' 45 | BUILD_TOOLCHAIN: 'amd64_x86' 46 | 47 | trigger: 48 | - master 49 | 50 | pool: 51 | vmImage: $(imageName) 52 | 53 | steps: 54 | - bash: | 55 | export 56 | displayName: 'Check environment' 57 | 58 | - bash: | 59 | bash build.sh 60 | displayName: 'Build' 61 | env: 62 | BUILD_TARGET: $(BUILD_TARGET) 63 | BUILD_COMPILER: $(BUILD_COMPILER) 64 | BUILD_ARCH: $(BUILD_ARCH) 65 | BUILD_TOOLCHAIN: $(BUILD_TOOLCHAIN) 66 | CCACHE_DISABLE: 1 67 | -------------------------------------------------------------------------------- /source/merge/MergeParameters.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | #include 25 | #include 26 | 27 | class MergeWorkflowParameters 28 | { 29 | public: 30 | MergeWorkflowParameters( 31 | const std::string& pathToReference, const std::string& outputPrefix, std::string pathToManifest, 32 | int shortestUnitToConsider, int longestUnitToConsider); 33 | 34 | const std::string& pathToReference() const { return pathToReference_; } 35 | const std::string& pathToMultisampleProfile() const { return pathToMultisampleProfile_; } 36 | const std::string& pathToManifest() const { return pathToManifest_; } 37 | int shortestUnitToConsider() const { return shortestUnitToConsider_; } 38 | int longestUnitToConsider() const { return longestUnitToConsider_; } 39 | 40 | private: 41 | std::string pathToReference_; 42 | std::string pathToMultisampleProfile_; 43 | std::string pathToManifest_; 44 | int shortestUnitToConsider_; 45 | int longestUnitToConsider_; 46 | }; 47 | 48 | void assertValidity(const MergeWorkflowParameters& parameters); 49 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/sinks/stdout_color_sinks.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2018 spdlog 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #ifndef SPDLOG_H 9 | #include "spdlog/spdlog.h" 10 | #endif 11 | 12 | #ifdef _WIN32 13 | #include "spdlog/sinks/wincolor_sink.h" 14 | #else 15 | #include "spdlog/sinks/ansicolor_sink.h" 16 | #endif 17 | 18 | namespace spdlog { 19 | namespace sinks { 20 | #ifdef _WIN32 21 | using stdout_color_sink_mt = wincolor_stdout_sink_mt; 22 | using stdout_color_sink_st = wincolor_stdout_sink_st; 23 | using stderr_color_sink_mt = wincolor_stderr_sink_mt; 24 | using stderr_color_sink_st = wincolor_stderr_sink_st; 25 | #else 26 | using stdout_color_sink_mt = ansicolor_stdout_sink_mt; 27 | using stdout_color_sink_st = ansicolor_stdout_sink_st; 28 | using stderr_color_sink_mt = ansicolor_stderr_sink_mt; 29 | using stderr_color_sink_st = ansicolor_stderr_sink_st; 30 | #endif 31 | } // namespace sinks 32 | 33 | template 34 | inline std::shared_ptr stdout_color_mt(const std::string &logger_name) 35 | { 36 | return Factory::template create(logger_name); 37 | } 38 | 39 | template 40 | inline std::shared_ptr stdout_color_st(const std::string &logger_name) 41 | { 42 | return Factory::template create(logger_name); 43 | } 44 | 45 | template 46 | inline std::shared_ptr stderr_color_mt(const std::string &logger_name) 47 | { 48 | return Factory::template create(logger_name); 49 | } 50 | 51 | template 52 | inline std::shared_ptr stderr_color_st(const std::string &logger_name) 53 | { 54 | return Factory::template create(logger_name); 55 | } 56 | } // namespace spdlog 57 | -------------------------------------------------------------------------------- /source/reads/Purity.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | #pragma once 22 | 23 | #include 24 | #include 25 | 26 | std::vector> ShiftUnits(const std::vector& units); 27 | 28 | double MatchRepeatRc( 29 | const std::vector>& units_shifts, const std::string& bases, const std::string& quals, 30 | size_t min_baseq = 20); 31 | 32 | double MatchRepeat( 33 | const std::vector>& units_shifts, const std::string& bases, const std::string& quals, 34 | size_t& match_offset, size_t min_baseq = 20); 35 | 36 | double MatchRepeat( 37 | const std::vector& units, const std::string& bases, const std::string& quals, size_t min_baseq = 20); 38 | 39 | double MatchUnits( 40 | const std::vector& units, std::string::const_iterator bases_start, 41 | std::string::const_iterator bases_end, std::string::const_iterator quals_start, 42 | std::string::const_iterator quals_end, size_t min_baseq = 20); 43 | 44 | std::vector> ShiftUnits(const std::vector& units); 45 | -------------------------------------------------------------------------------- /examples/case-control/example_dataset.multisample_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "Counts": { 3 | "AGC": { 4 | "IrrPairCounts": { 5 | "sample2": 2, 6 | "sample3": 1 7 | }, 8 | "RegionsWithIrrAnchors": { 9 | "StrA:1723-2123": { 10 | "sample1": 8, 11 | "sample2": 32, 12 | "sample3": 29, 13 | "sample4": 5, 14 | "sample5": 9 15 | } 16 | } 17 | }, 18 | "CCG": { 19 | "IrrPairCounts": { 20 | "sample1": 39, 21 | "sample2": 46, 22 | "sample3": 35, 23 | "sample4": 1, 24 | "sample5": 4, 25 | "sample6": 1, 26 | "sample7": 3 27 | }, 28 | "RegionsWithIrrAnchors": { 29 | "StrB:1684-2154": { 30 | "sample1": 21, 31 | "sample2": 22, 32 | "sample3": 30, 33 | "sample4": 28, 34 | "sample5": 19, 35 | "sample6": 20, 36 | "sample7": 19 37 | } 38 | } 39 | } 40 | }, 41 | "Parameters": { 42 | "Depths": { 43 | "sample1": 37.81827259111333, 44 | "sample2": 38.45481777333999, 45 | "sample3": 38.6045931103345, 46 | "sample4": 37.74338492261607, 47 | "sample5": 37.593609585621564, 48 | "sample6": 37.51872191712431, 49 | "sample7": 37.33150274588118 50 | }, 51 | "ReadLengths": { 52 | "sample1": 150, 53 | "sample2": 150, 54 | "sample3": 150, 55 | "sample4": 150, 56 | "sample5": 150, 57 | "sample6": 150, 58 | "sample7": 150 59 | } 60 | } 61 | } -------------------------------------------------------------------------------- /source/Makefile: -------------------------------------------------------------------------------- 1 | CC=/illumina/thirdparty/gcc/gcc-4.9.2/bin/g++ -O3 -std=c++11 -static 2 | 3 | BOOST_BASE=/illumina/thirdparty/boost/boost_1_54_0_python2.7/ 4 | 5 | BOOST_INC_DIR=${BOOST_BASE}/include 6 | BOOST_LIB_DIR=${BOOST_BASE}/lib 7 | BOOST_LIB_SPEC=-lboost_program_options -lboost_filesystem -lboost_system -lboost_regex -lboost_date_time -lpthread 8 | 9 | SAMTOOLS_BASE=~/popdata/progs/htslib/ 10 | SAMTOOLS_INC_DIR=${SAMTOOLS_BASE} 11 | SAMTOOLS_LIB_DIR=${SAMTOOLS_BASE} 12 | # actually -lhts -lz BUT -lz already specified separately 13 | SAMTOOLS_LIB_SPEC=-lhts 14 | 15 | COMPR_LIB_SPEC=-lz 16 | 17 | SRC_DIR=src 18 | INC_DIR=-I. -Ithird_party 19 | LIB_DIR=src 20 | INSTALL_DIR=bin 21 | BUILD_DIR=local-build 22 | 23 | LIBS=${BUILD_DIR}/region.o ${BUILD_DIR}/parameters.o \ 24 | ${BUILD_DIR}/purity.o ${BUILD_DIR}/pair_collector.o \ 25 | ${BUILD_DIR}/ref_genome.o ${BUILD_DIR}/bam_file.o \ 26 | ${BUILD_DIR}/irr_finder.o ${BUILD_DIR}/common.o \ 27 | ${BUILD_DIR}/classifiers.o 28 | 29 | all: ${INSTALL_DIR} ${INSTALL_DIR}/ExpansionHunterDenovo 30 | 31 | ${BUILD_DIR}: 32 | mkdir -p ${BUILD_DIR} 33 | 34 | ${BUILD_DIR}/%.o: common/%.cc 35 | ${CC} -c -o $@ $< ${INC_DIR} 36 | 37 | ${BUILD_DIR}/%.o: purity/%.cc 38 | ${CC} -c -o $@ $< ${INC_DIR} 39 | 40 | ${BUILD_DIR}/%.o: regions/%.cc 41 | ${CC} -c -o $@ $< -I ${BOOST_INC_DIR} ${INC_DIR} 42 | 43 | ${BUILD_DIR}/%.o: classification/%.cc 44 | ${CC} -c -o $@ $< -I ${BOOST_INC_DIR} ${INC_DIR} 45 | 46 | ${BUILD_DIR}/%.o: pair_collector/%.cc 47 | ${CC} -c -o $@ $< ${INC_DIR} 48 | 49 | ${BUILD_DIR}/%.o: ${LIB_DIR}/%.cc ${BUILD_DIR} 50 | ${CC} -c -o $@ $< -I ${SAMTOOLS_INC_DIR} -I ${BOOST_INC_DIR} ${INC_DIR} 51 | 52 | ${INSTALL_DIR}: 53 | mkdir -p ${INSTALL_DIR} 54 | 55 | ${INSTALL_DIR}/ExpansionHunterDenovo: ${SRC_DIR}/expansion_hunter_denovo.cc ${LIBS} 56 | ${CC} -o $@ $^ -I ${SAMTOOLS_INC_DIR} -I ${BOOST_INC_DIR} ${INC_DIR} -L ${SAMTOOLS_LIB_DIR} -L ${BOOST_LIB_DIR} ${SAMTOOLS_LIB_SPEC} ${COMPR_LIB_SPEC} ${BOOST_LIB_SPEC} 57 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/patch/1.63.0/context_0001_arm64_cpu.patch: -------------------------------------------------------------------------------- 1 | From 26b61a67cf1d384796e5ae2f207c5b6fa56015e5 Mon Sep 17 00:00:00 2001 2 | From: Oliver Kowalke 3 | Date: Thu, 5 Jan 2017 10:38:47 -0800 4 | Subject: [PATCH] remove directive '.cpu' for ARM64/AAPCS/ELF 5 | 6 | --- 7 | src/asm/jump_arm64_aapcs_elf_gas.S | 1 - 8 | src/asm/make_arm64_aapcs_elf_gas.S | 1 - 9 | src/asm/ontop_arm64_aapcs_elf_gas.S | 1 - 10 | 3 files changed, 3 deletions(-) 11 | 12 | diff --git a/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S b/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S 13 | index 7c0c2fa..1b8ce9e 100644 14 | --- a/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S 15 | +++ b/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S 16 | @@ -51,7 +51,6 @@ 17 | * * 18 | *******************************************************/ 19 | 20 | -.cpu generic+fp+simd 21 | .text 22 | .align 2 23 | .global jump_fcontext 24 | diff --git a/libs/context/src/asm/make_arm64_aapcs_elf_gas.S b/libs/context/src/asm/make_arm64_aapcs_elf_gas.S 25 | index e71a91c..c1fa843 100644 26 | --- a/libs/context/src/asm/make_arm64_aapcs_elf_gas.S 27 | +++ b/libs/context/src/asm/make_arm64_aapcs_elf_gas.S 28 | @@ -51,7 +51,6 @@ 29 | * * 30 | *******************************************************/ 31 | 32 | -.cpu generic+fp+simd 33 | .text 34 | .align 2 35 | .global make_fcontext 36 | diff --git a/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S b/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S 37 | index 7e3b047..02a3b07 100644 38 | --- a/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S 39 | +++ b/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S 40 | @@ -51,7 +51,6 @@ 41 | * * 42 | *******************************************************/ 43 | 44 | -.cpu generic+fp+simd 45 | .text 46 | .align 2 47 | .global ontop_fcontext 48 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/context.cmake: -------------------------------------------------------------------------------- 1 | if(USE_WINDOWS) 2 | check_preprocessor(USE_X86 _M_IX86) 3 | check_preprocessor(USE_GNUC __GNUC__) 4 | if(USE_X86) 5 | set(context_arch i386) 6 | else() 7 | set(context_arch x86_64) 8 | endif() 9 | 10 | if(USE_GNUC) 11 | # MinGW 12 | enable_language(ASM) 13 | set(context_assembler gas) 14 | else() 15 | enable_language(ASM_MASM) 16 | set(context_assembler masm) 17 | endif() 18 | 19 | set(context_srcs 20 | ${BOOST_SOURCE}/libs/context/src/asm/jump_${context_arch}_ms_pe_${context_assembler}.asm 21 | ${BOOST_SOURCE}/libs/context/src/asm/make_${context_arch}_ms_pe_${context_assembler}.asm 22 | ${BOOST_SOURCE}/libs/context/src/asm/ontop_${context_arch}_ms_pe_${context_assembler}.asm 23 | ${BOOST_SOURCE}/libs/context/src/windows/stack_traits.cpp 24 | ) 25 | else() 26 | # Using combined sources in order to support macOS / iOS universal builds 27 | # It makes it super easy to extend to new platforms too 28 | set(context_srcs 29 | ${CMAKE_CURRENT_LIST_DIR}/context/jump_combined.S 30 | ${CMAKE_CURRENT_LIST_DIR}/context/make_combined.S 31 | ${CMAKE_CURRENT_LIST_DIR}/context/ontop_combined.S 32 | ${BOOST_SOURCE}/libs/context/src/posix/stack_traits.cpp 33 | ) 34 | enable_language(ASM) 35 | endif() 36 | 37 | _add_boost_lib( 38 | NAME context 39 | SOURCES 40 | ${context_srcs} 41 | DEFINE_PRIVATE 42 | BOOST_CONTEXT_SOURCE=1 43 | BOOST_CONTEXT_EXPORT 44 | LINK 45 | Boost::thread 46 | ) 47 | 48 | _add_boost_test( 49 | NAME context_test 50 | LINK 51 | Boost::context 52 | Boost::unit_test_framework 53 | TESTS 54 | RUN ${BOOST_SOURCE}/libs/context/test/test_invoke.cpp 55 | RUN ${BOOST_SOURCE}/libs/context/test/test_apply.cpp 56 | RUN ${BOOST_SOURCE}/libs/context/test/test_fcontext.cpp 57 | RUN ${BOOST_SOURCE}/libs/context/test/test_fiber.cpp 58 | RUN ${BOOST_SOURCE}/libs/context/test/test_callcc.cpp 59 | RUN ${BOOST_SOURCE}/libs/context/test/test_execution_context_v2.cpp 60 | ) 61 | -------------------------------------------------------------------------------- /source/reads/IrrFinder.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | // Code to detect sequences (in-repeat reads) that consist of repetitions of some short string (repeat unit). For 23 | // example, CGGCGGCGGCGG is an in-repeat read with repeat unit CGG. 24 | 25 | #pragma once 26 | 27 | #include 28 | 29 | #include "common/Interval.hh" 30 | 31 | int MaxMatchesAtOffset(int offset, const std::string& bases); 32 | double MatchFrequencyAtOffset(int offset, const std::string& bases); 33 | int SmallestFrequentPeriod( 34 | double minFrequency, const std::string& bases, const Interval& periodSizeRange = Interval(1, 20)); 35 | char ExtractConsensusBase(int32_t offset, int32_t period, const std::string& bases); 36 | std::string ExtractConsensusRepeatUnit(double period, const std::string& bases); 37 | std::string MinimialUnitUnderShift(const std::string& unit); 38 | std::string ComputeCanonicalRepeatUnit(const std::string& unit); 39 | std::string ComputeCanonicalRepeatUnit( 40 | double minFrequency, const std::string& bases, const Interval& motifSizeRange = Interval(1, 20)); 41 | bool IsInrepeatRead( 42 | const std::string& bases, const std::string& quals, std::string& unit, 43 | const Interval& motifSizeRange = Interval(1, 20)); 44 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/sinks/base_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // base sink templated over a mutex (either dummy or real) 9 | // concrete implementation should override the sink_it_() and flush_() methods. 10 | // locking is taken care of in this class - no locking needed by the 11 | // implementers.. 12 | // 13 | 14 | #include "spdlog/common.h" 15 | #include "spdlog/details/log_msg.h" 16 | #include "spdlog/formatter.h" 17 | #include "spdlog/sinks/sink.h" 18 | 19 | namespace spdlog { 20 | namespace sinks { 21 | template 22 | class base_sink : public sink 23 | { 24 | public: 25 | base_sink() = default; 26 | base_sink(const base_sink &) = delete; 27 | base_sink &operator=(const base_sink &) = delete; 28 | 29 | void log(const details::log_msg &msg) final 30 | { 31 | std::lock_guard lock(mutex_); 32 | sink_it_(msg); 33 | } 34 | 35 | void flush() final 36 | { 37 | std::lock_guard lock(mutex_); 38 | flush_(); 39 | } 40 | 41 | void set_pattern(const std::string &pattern) final 42 | { 43 | std::lock_guard lock(mutex_); 44 | set_pattern_(pattern); 45 | } 46 | 47 | void set_formatter(std::unique_ptr sink_formatter) final 48 | { 49 | std::lock_guard lock(mutex_); 50 | set_formatter_(std::move(sink_formatter)); 51 | } 52 | 53 | protected: 54 | virtual void sink_it_(const details::log_msg &msg) = 0; 55 | virtual void flush_() = 0; 56 | 57 | virtual void set_pattern_(const std::string &pattern) 58 | { 59 | set_formatter_(details::make_unique(pattern)); 60 | } 61 | 62 | virtual void set_formatter_(std::unique_ptr sink_formatter) 63 | { 64 | formatter_ = std::move(sink_formatter); 65 | } 66 | Mutex mutex_; 67 | }; 68 | } // namespace sinks 69 | } // namespace spdlog 70 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/sinks/basic_file_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015-2018 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #ifndef SPDLOG_H 9 | #include "spdlog/spdlog.h" 10 | #endif 11 | 12 | #include "spdlog/details/file_helper.h" 13 | #include "spdlog/details/null_mutex.h" 14 | #include "spdlog/sinks/base_sink.h" 15 | 16 | #include 17 | #include 18 | 19 | namespace spdlog { 20 | namespace sinks { 21 | /* 22 | * Trivial file sink with single file as target 23 | */ 24 | template 25 | class basic_file_sink final : public base_sink 26 | { 27 | public: 28 | explicit basic_file_sink(const filename_t &filename, bool truncate = false) 29 | { 30 | file_helper_.open(filename, truncate); 31 | } 32 | 33 | protected: 34 | void sink_it_(const details::log_msg &msg) override 35 | { 36 | fmt::memory_buffer formatted; 37 | sink::formatter_->format(msg, formatted); 38 | file_helper_.write(formatted); 39 | } 40 | 41 | void flush_() override 42 | { 43 | file_helper_.flush(); 44 | } 45 | 46 | private: 47 | details::file_helper file_helper_; 48 | }; 49 | 50 | using basic_file_sink_mt = basic_file_sink; 51 | using basic_file_sink_st = basic_file_sink; 52 | 53 | } // namespace sinks 54 | 55 | // 56 | // factory functions 57 | // 58 | template 59 | inline std::shared_ptr basic_logger_mt(const std::string &logger_name, const filename_t &filename, bool truncate = false) 60 | { 61 | return Factory::template create(logger_name, filename, truncate); 62 | } 63 | 64 | template 65 | inline std::shared_ptr basic_logger_st(const std::string &logger_name, const filename_t &filename, bool truncate = false) 66 | { 67 | return Factory::template create(logger_name, filename, truncate); 68 | } 69 | 70 | } // namespace spdlog 71 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/details/periodic_worker.h: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // Copyright(c) 2018 Gabi Melman. 4 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 5 | // 6 | 7 | #pragma once 8 | 9 | // periodic worker thread - periodically executes the given callback function. 10 | // 11 | // RAII over the owned thread: 12 | // creates the thread on construction. 13 | // stops and joins the thread on destruction (if the thread is executing a callback, wait for it to finish first). 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | namespace spdlog { 21 | namespace details { 22 | 23 | class periodic_worker 24 | { 25 | public: 26 | periodic_worker(const std::function &callback_fun, std::chrono::seconds interval) 27 | { 28 | active_ = (interval > std::chrono::seconds::zero()); 29 | if (!active_) 30 | { 31 | return; 32 | } 33 | 34 | worker_thread_ = std::thread([this, callback_fun, interval]() { 35 | for (;;) 36 | { 37 | std::unique_lock lock(this->mutex_); 38 | if (this->cv_.wait_for(lock, interval, [this] { return !this->active_; })) 39 | { 40 | return; // active_ == false, so exit this thread 41 | } 42 | callback_fun(); 43 | } 44 | }); 45 | } 46 | 47 | periodic_worker(const periodic_worker &) = delete; 48 | periodic_worker &operator=(const periodic_worker &) = delete; 49 | 50 | // stop the worker thread and join it 51 | ~periodic_worker() 52 | { 53 | if (worker_thread_.joinable()) 54 | { 55 | { 56 | std::lock_guard lock(mutex_); 57 | active_ = false; 58 | } 59 | cv_.notify_one(); 60 | worker_thread_.join(); 61 | } 62 | } 63 | 64 | private: 65 | bool active_; 66 | std::thread worker_thread_; 67 | std::mutex mutex_; 68 | std::condition_variable cv_; 69 | }; 70 | } // namespace details 71 | } // namespace spdlog 72 | -------------------------------------------------------------------------------- /source/profile/ReadClassification.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #include "ReadClassification.hh" 23 | #include "reads/IrrFinder.hh" 24 | 25 | using std::string; 26 | 27 | ReadType classifyRead(Interval motifSizeRange, int max_irr_mapq, int min_anchor_mapq, const Read& read, string& unit) 28 | { 29 | const bool is_unmapped = read.flag & 0x4; 30 | const bool is_low_mapq = read.mapq <= max_irr_mapq; 31 | 32 | const bool is_irr = (is_unmapped || is_low_mapq) && IsInrepeatRead(read.bases, read.quals, unit, motifSizeRange); 33 | 34 | if (is_irr) 35 | { 36 | return ReadType::kIrrRead; 37 | } 38 | 39 | if (read.mapq >= min_anchor_mapq) 40 | { 41 | return ReadType::kAnchorRead; 42 | } 43 | 44 | return ReadType::kOtherRead; 45 | } 46 | 47 | PairType classifyPair(ReadType read_type, const string& read_unit, ReadType mate_type, const string& mate_unit) 48 | { 49 | if ((read_type == ReadType::kAnchorRead && mate_type == ReadType::kIrrRead) 50 | || (read_type == ReadType::kIrrRead && mate_type == ReadType::kAnchorRead)) 51 | { 52 | return PairType::kIrrAnchorPair; 53 | } 54 | 55 | if (read_type == ReadType::kIrrRead && mate_type == ReadType::kIrrRead && read_unit == mate_unit) 56 | { 57 | return PairType::kIrrIrrPair; 58 | } 59 | 60 | return PairType::kOtherPair; 61 | } 62 | -------------------------------------------------------------------------------- /source/profile/SampleRunStats.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include 30 | 31 | #include "region/ReferenceContigInfo.hh" 32 | 33 | class SampleRunStats 34 | { 35 | public: 36 | SampleRunStats(int meanReadLength, double depth) 37 | : meanReadLength_(meanReadLength) 38 | , depth_(depth) 39 | { 40 | } 41 | 42 | int meanReadLength() const { return meanReadLength_; } 43 | double depth() const { return depth_; } 44 | 45 | bool operator==(const SampleRunStats& other) const; 46 | 47 | private: 48 | int meanReadLength_; 49 | double depth_; 50 | }; 51 | 52 | std::ostream& operator<<(std::ostream& out, const SampleRunStats& stats); 53 | 54 | // Computes read and coverage statistics for each locus from reads aligning to the flanks 55 | class SampleRunStatsCalculator 56 | { 57 | public: 58 | explicit SampleRunStatsCalculator(ReferenceContigInfo contigInfo); 59 | 60 | void inspect(int contigId, int readLength); 61 | 62 | boost::optional estimate() const; 63 | 64 | private: 65 | ReferenceContigInfo contigInfo_; 66 | 67 | std::unordered_map contigIdToReadCount; 68 | int64_t totalReadCount; 69 | int64_t sumOfReadLengths; 70 | }; 71 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/exception.cmake: -------------------------------------------------------------------------------- 1 | _add_boost_lib( 2 | NAME exception 3 | SOURCES 4 | ${BOOST_SOURCE}/libs/exception/src/clone_current_exception_non_intrusive.cpp 5 | ) 6 | 7 | _add_boost_test( 8 | NAME exception_test 9 | LINK 10 | Boost::exception 11 | Boost::thread 12 | TESTS 13 | RUN ${BOOST_SOURCE}/libs/exception/test/is_output_streamable_test.cpp 14 | RUN ${BOOST_SOURCE}/libs/exception/test/has_to_string_test.cpp 15 | RUN ${BOOST_SOURCE}/libs/exception/test/to_string_test.cpp 16 | RUN ${BOOST_SOURCE}/libs/exception/test/to_string_stub_test.cpp 17 | RUN ${BOOST_SOURCE}/libs/exception/test/1-throw_exception_test.cpp 18 | RUN ${BOOST_SOURCE}/libs/exception/test/2-throw_exception_no_exceptions_test.cpp 19 | RUN ${BOOST_SOURCE}/libs/exception/test/3-throw_exception_no_integration_test.cpp 20 | RUN ${BOOST_SOURCE}/libs/exception/test/4-throw_exception_no_both_test.cpp 21 | RUN ${BOOST_SOURCE}/libs/exception/test/cloning_test.cpp 22 | RUN ${BOOST_SOURCE}/libs/exception/test/copy_exception_test.cpp 23 | RUN ${BOOST_SOURCE}/libs/exception/test/unknown_exception_test.cpp 24 | RUN ${BOOST_SOURCE}/libs/exception/test/exception_test.cpp 25 | RUN ${BOOST_SOURCE}/libs/exception/test/enable_error_info_test.cpp 26 | ${BOOST_SOURCE}/libs/exception/test/helper1.cpp 27 | RUN ${BOOST_SOURCE}/libs/exception/test/throw_exception_test.cpp 28 | ${BOOST_SOURCE}/libs/exception/test/helper2.cpp 29 | # Fails to build on Android x86 30 | # RUN ${BOOST_SOURCE}/libs/exception/test/errno_test.cpp 31 | RUN ${BOOST_SOURCE}/libs/exception/test/error_info_lv_test.cpp 32 | RUN ${BOOST_SOURCE}/libs/exception/test/error_info_lv_const_test.cpp 33 | RUN ${BOOST_SOURCE}/libs/exception/test/error_info_rv_test.cpp 34 | RUN ${BOOST_SOURCE}/libs/exception/test/error_info_rv_const_test.cpp 35 | RUN ${BOOST_SOURCE}/libs/exception/test/diagnostic_information_test.cpp 36 | RUN ${BOOST_SOURCE}/libs/exception/test/refcount_ptr_test.cpp 37 | RUN ${BOOST_SOURCE}/libs/exception/test/current_exception_cast_test.cpp 38 | RUN ${BOOST_SOURCE}/libs/exception/test/errinfos_test.cpp 39 | RUN ${BOOST_SOURCE}/libs/exception/test/exception_ptr_test.cpp 40 | ) 41 | -------------------------------------------------------------------------------- /scripts/tests/test_wilcoxon.py: -------------------------------------------------------------------------------- 1 | # 2 | # ExpansionHunter Denovo 3 | # Copyright 2016-2019 Illumina, Inc. 4 | # All rights reserved. 5 | # 6 | # Author: Egor Dolzhenko , 7 | # Michael Eberle 8 | # 9 | # Licensed under the Apache License, Version 2.0 (the "License"); 10 | # you may not use this file except in compliance with the License. 11 | # You may obtain a copy of the License at 12 | # 13 | # http://www.apache.org/licenses/LICENSE-2.0 14 | # 15 | # Unless required by applicable law or agreed to in writing, software 16 | # distributed under the License is distributed on an "AS IS" BASIS, 17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | # See the License for the specific language governing permissions and 19 | # limitations under the License. 20 | # 21 | # 22 | 23 | import sys 24 | import pytest 25 | 26 | from core.wilcoxontest import * 27 | 28 | 29 | class TestPermutationTest(object): 30 | def setup_method(self, test_method): 31 | self.cases = [8.50, 9.48, 8.65, 8.16, 8.83, 7.76, 8.63] 32 | self.controls = [8.27, 8.20, 8.25, 8.14, 9.00, 8.10, 7.20, 8.32, 7.70] 33 | self.expected_case_rank_sum = 75 34 | self.expected_pvalue = 0.057 35 | 36 | def test_permutation_pvalue(self): 37 | pvalue = calculate_permutation_pvalue( 38 | self.cases, self.controls, num_permutations=100000 39 | ) 40 | 41 | assert abs(pvalue - self.expected_pvalue) < 0.01 42 | 43 | 44 | class TestPermutationTestWithTies(object): 45 | def setup_method(self, test_method): 46 | self.cases = [0.45, 0.50, 0.61, 0.63, 0.75, 0.85, 0.93] 47 | self.controls = [0.44, 0.45, 0.52, 0.53, 0.56, 0.58, 0.58, 0.65, 0.79] 48 | self.expected_case_rank_sum = 71.5 49 | self.expected_pvalue = 0.105 50 | 51 | def test_permutation_pvalue(self): 52 | pvalue = calculate_permutation_pvalue( 53 | self.cases, self.controls, num_permutations=100000 54 | ) 55 | 56 | assert abs(pvalue - self.expected_pvalue) < 0.01 57 | 58 | def test_approximate_pvalue(self): 59 | pvalue = calculate_approximate_pvalue(self.cases, self.controls) 60 | assert abs(pvalue - self.expected_pvalue) < 0.01 61 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/cmake/Modules/AddBoostTest.cmake: -------------------------------------------------------------------------------- 1 | function(_add_boost_test) 2 | if(NOT BOOST_STANDALONE OR BOOST_DISABLE_TESTS) 3 | return() 4 | endif() 5 | 6 | set(options) 7 | set(oneValueArgs NAME) 8 | set(multiValueArgs LINK DEFINE TESTS) 9 | cmake_parse_arguments(BOOSTTEST "${options}" "${oneValueArgs}" 10 | "${multiValueArgs}" ${ARGN}) 11 | 12 | # Split all arguments for TESTS in groups 13 | list(LENGTH BOOSTTEST_TESTS arg_len) 14 | set(arg_start 0) 15 | while(${arg_start} LESS ${arg_len}) 16 | list(GET BOOSTTEST_TESTS ${arg_start} test_command) 17 | math(EXPR arg_start "${arg_start} + 1") 18 | unset(test_files) 19 | 20 | foreach(arg_pos RANGE ${arg_start} ${arg_len}) 21 | set(arg_start ${arg_pos}) 22 | if(arg_start EQUAL ${arg_len}) 23 | break() 24 | endif() 25 | 26 | list(GET BOOSTTEST_TESTS ${arg_pos} arg) 27 | if(arg STREQUAL "RUN") 28 | break() 29 | else() 30 | list(APPEND test_files ${arg}) 31 | endif() 32 | endforeach() 33 | 34 | if(test_command STREQUAL "RUN") 35 | if(NOT test_files) 36 | message(FATAL_ERROR "Missing files for test command ${test_command}") 37 | endif() 38 | list(GET test_files 0 main_src) 39 | get_filename_component(source_name ${main_src} NAME_WE) 40 | set(test_name Boost_${BOOSTTEST_NAME}_${source_name}) 41 | add_executable(${test_name} ${test_files}) 42 | add_test(NAME ${test_name} 43 | COMMAND ${test_name} 44 | WORKING_DIRECTORY ${BOOST_SOURCE}/status 45 | ) 46 | set_target_properties(${test_name} PROPERTIES 47 | FOLDER "Boost/Tests" 48 | ) 49 | if(MSVC) 50 | target_compile_options(${test_name} PRIVATE /bigobj) 51 | endif() 52 | if(NOT BOOST_STANDALONE) 53 | set_target_properties(${test_name} PROPERTIES EXCLUDE_FROM_ALL 1) 54 | endif() 55 | if(BOOSTTEST_DEFINE) 56 | target_compile_definitions(${test_name} PUBLIC ${BOOSTTEST_DEFINE}) 57 | endif() 58 | if(BOOSTTEST_LINK) 59 | target_link_libraries(${test_name} PUBLIC ${BOOSTTEST_LINK}) 60 | endif() 61 | else() 62 | message(FATAL_ERROR "Unknown test command: ${test_command}") 63 | endif() 64 | endwhile() 65 | endfunction() 66 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/cmake/Modules/FindIconv.cmake: -------------------------------------------------------------------------------- 1 | function(_iconv_find) 2 | if(ICONV_ROOT) 3 | list(APPEND iconv_roots ${ICONV_ROOT}) 4 | else() 5 | if(NOT "$ENV{ICONV_ROOT}" STREQUAL "") 6 | file(TO_CMAKE_PATH "$ENV{ICONV_ROOT}" NATIVE_PATH) 7 | list(APPEND iconv_roots "${NATIVE_PATH}") 8 | set(ICONV_ROOT "${NATIVE_PATH}" 9 | CACHE PATH "Location of the Iconv installation" FORCE) 10 | endif() 11 | endif() 12 | 13 | list(APPEND iconv_library_suffixes "lib") 14 | list(APPEND iconv_include_suffixes "include") 15 | 16 | find_path(ICONV_INCLUDE_DIR 17 | NAMES "iconv.h" 18 | HINTS ${iconv_roots} 19 | PATH_SUFFIXES ${iconv_include_suffixes} 20 | DOC "Iconv include directory") 21 | set(ICONV_INCLUDE_DIR "${ICONV_INCLUDE_DIR}" PARENT_SCOPE) 22 | 23 | find_library(ICONV_LIBRARY 24 | NAMES 25 | iconv 26 | libiconv 27 | libiconv2 28 | HINTS ${iconv_roots} 29 | PATH_SUFFIXES ${iconv_library_suffixes} 30 | DOC "Iconv library") 31 | set(ICONV_LIBRARY "${ICONV_LIBRARY}" PARENT_SCOPE) 32 | 33 | if(ICONV_INCLUDE_DIR AND NOT ICONV_LIBRARY) 34 | include(CheckFunctionExists) 35 | check_function_exists(iconv HAVE_ICONV_IN_LIBC) 36 | if(HAVE_ICONV_IN_LIBC) 37 | set(HAVE_ICONV_IN_LIBC "${HAVE_ICONV_IN_LIBC}" PARENT_SCOPE) 38 | set(ICONV_LIBRARY "integrated in standard library" PARENT_SCOPE) 39 | endif() 40 | endif() 41 | 42 | if(ICONV_INCLUDE_DIR AND ICONV_LIBRARY) 43 | set(ICONV_FOUND ON PARENT_SCOPE) 44 | endif() 45 | endfunction() 46 | 47 | _iconv_find() 48 | 49 | if(ICONV_FOUND) 50 | if(NOT ICONV_FIND_QUIETLY) 51 | message(STATUS "Found iconv library: ${ICONV_LIBRARY}") 52 | endif() 53 | 54 | if(HAVE_ICONV_IN_LIBC) 55 | set(_lib_type INTERFACE) 56 | else() 57 | set(_lib_type UNKNOWN) 58 | endif() 59 | 60 | add_library(Iconv::Iconv ${_lib_type} IMPORTED) 61 | set_target_properties(Iconv::Iconv PROPERTIES 62 | INTERFACE_INCLUDE_DIRECTORIES "${ICONV_INCLUDE_DIR}") 63 | 64 | if(NOT HAVE_ICONV_IN_LIBC) 65 | set_target_properties(Iconv::Iconv PROPERTIES 66 | IMPORTED_LOCATION "${ICONV_LIBRARY}") 67 | endif() 68 | 69 | unset(_lib_type) 70 | else() 71 | if(ICONV_FIND_REQUIRED) 72 | message(FATAL_ERROR "Could NOT find iconv library") 73 | endif() 74 | endif() 75 | 76 | mark_as_advanced(ICONV_LIBRARY ICONV_INCLUDE_DIR) 77 | -------------------------------------------------------------------------------- /scripts/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | db.sqlite3-journal 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # IPython 80 | profile_default/ 81 | ipython_config.py 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # pipenv 87 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 88 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 89 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 90 | # install all needed dependencies. 91 | #Pipfile.lock 92 | 93 | # celery beat schedule file 94 | celerybeat-schedule 95 | 96 | # SageMath parsed files 97 | *.sage.py 98 | 99 | # Environments 100 | .env 101 | .venv 102 | env/ 103 | venv/ 104 | ENV/ 105 | env.bak/ 106 | venv.bak/ 107 | 108 | # Spyder project settings 109 | .spyderproject 110 | .spyproject 111 | 112 | # Rope project settings 113 | .ropeproject 114 | 115 | # mkdocs documentation 116 | /site 117 | 118 | # mypy 119 | .mypy_cache/ 120 | .dmypy.json 121 | dmypy.json 122 | 123 | # Pyre type checker 124 | .pyre/ -------------------------------------------------------------------------------- /source/profile/ProfileParameters.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | #include 25 | 26 | #include 27 | 28 | #include "common/Interval.hh" 29 | 30 | class ProfileWorkflowParameters 31 | { 32 | public: 33 | ProfileWorkflowParameters( 34 | const std::string& outputPrefix, bool logReads, std::string pathToReads, std::string pathToReference, 35 | Interval motifSizeRange, int minMapqOfAnchorRead, int maxMapqOfInrepeatRead); 36 | 37 | const std::string& profilePath() const { return profilePath_; } 38 | const std::string& pathToLocusTable() const { return pathToLocusTable_; } 39 | const std::string& pathToMotifTable() const { return pathToMotifTable_; } 40 | const std::string& pathToReads() const { return pathToReads_; } 41 | const std::string& pathToReference() const { return pathToReference_; } 42 | const boost::optional& pathToReadLog() const { return pathToReadLog_; } 43 | const Interval& motifSizeRange() const { return motifSizeRange_; } 44 | int minMapqOfAnchorRead() const { return minMapqOfAnchorRead_; } 45 | int maxMapqOfInrepeatRead() const { return maxMapqOfInrepeatRead_; } 46 | 47 | private: 48 | std::string profilePath_; 49 | std::string pathToLocusTable_; 50 | std::string pathToMotifTable_; 51 | std::string pathToReads_; 52 | std::string pathToReference_; 53 | boost::optional pathToReadLog_; 54 | Interval motifSizeRange_; 55 | int minMapqOfAnchorRead_; 56 | int maxMapqOfInrepeatRead_; 57 | }; 58 | 59 | void assertValidity(const ProfileWorkflowParameters& parameters); 60 | -------------------------------------------------------------------------------- /scripts/core/wilcoxontest.py: -------------------------------------------------------------------------------- 1 | # 2 | # ExpansionHunter Denovo 3 | # Copyright 2016-2019 Illumina, Inc. 4 | # All rights reserved. 5 | # 6 | # Author: Egor Dolzhenko , 7 | # Michael Eberle 8 | # 9 | # Licensed under the Apache License, Version 2.0 (the "License"); 10 | # you may not use this file except in compliance with the License. 11 | # You may obtain a copy of the License at 12 | # 13 | # http://www.apache.org/licenses/LICENSE-2.0 14 | # 15 | # Unless required by applicable law or agreed to in writing, software 16 | # distributed under the License is distributed on an "AS IS" BASIS, 17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | # See the License for the specific language governing permissions and 19 | # limitations under the License. 20 | # 21 | # 22 | 23 | import numpy as np 24 | import scipy.special 25 | import scipy.stats 26 | 27 | 28 | def calculate_approximate_pvalue(cases, controls): 29 | all_counts = cases + controls 30 | ranks = scipy.stats.rankdata(all_counts) 31 | case_rank_sum = np.sum(ranks[: len(cases)]) 32 | num_counts = len(cases) + len(controls) 33 | 34 | mu_cases = len(cases) * (num_counts + 1) / 2 35 | sigma_cases = np.sqrt(len(cases) * len(controls) * (num_counts + 1) / 12) 36 | z_cases = (case_rank_sum - mu_cases) / sigma_cases 37 | 38 | return 1 - scipy.stats.norm.cdf(z_cases) 39 | 40 | 41 | def calculate_permutation_pvalue(cases, controls, num_permutations): 42 | all_counts = cases + controls 43 | ranks = scipy.stats.rankdata(all_counts) 44 | num_cases = len(cases) 45 | true_case_rank_sum = np.sum(ranks[:num_cases]) 46 | 47 | permuted_case_ranks = np.random.choice(ranks, size=(num_permutations, num_cases)) 48 | permuted_case_rank_sums = np.sum(permuted_case_ranks, axis=1) 49 | 50 | num_case_rank_sums_as_extreme_as_true = np.sum( 51 | permuted_case_rank_sums >= true_case_rank_sum 52 | ) 53 | 54 | return (num_case_rank_sums_as_extreme_as_true + 1) / (num_permutations + 1) 55 | 56 | 57 | def wilcoxon_rank_sum_test(test_params, cases, controls): 58 | method, *params = test_params 59 | if method == "normal": 60 | return calculate_approximate_pvalue(cases, controls) 61 | elif method == "permute": 62 | num_perms = params[0] 63 | return calculate_permutation_pvalue(cases, controls, num_perms) 64 | else: 65 | assert False, "{} is an unknown method type".format(method) 66 | -------------------------------------------------------------------------------- /source/io/HtsFileStreamer.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | extern "C" 29 | { 30 | #include "htslib/hts.h" 31 | #include "htslib/sam.h" 32 | } 33 | 34 | #include "reads/Read.hh" 35 | #include "region/ReferenceContigInfo.hh" 36 | 37 | class HtsFileStreamer 38 | { 39 | public: 40 | HtsFileStreamer(std::string htsFilePath, std::string referencePath) 41 | : htsFilePath_(std::move(htsFilePath)) 42 | , referencePath_(std::move(referencePath)) 43 | , contigInfo_({}) 44 | { 45 | openHtsFile(); 46 | loadHeader(); 47 | prepareForStreamingAlignments(); 48 | } 49 | ~HtsFileStreamer(); 50 | 51 | const ReferenceContigInfo& contigInfo() const { return contigInfo_; } 52 | 53 | bool trySeekingToNextPrimaryAlignment(); 54 | 55 | int currentReadContigId() const; 56 | int currentReadPosition() const; 57 | int currentReadLength() const; 58 | int currentMateContigId() const; 59 | int currentMatePosition() const; 60 | 61 | bool isStreamingAlignedReads() const; 62 | 63 | Read decodeRead() const; 64 | 65 | private: 66 | enum class Status 67 | { 68 | kStreamingReads, 69 | kFinishedStreaming 70 | }; 71 | 72 | void openHtsFile(); 73 | void loadHeader(); 74 | void prepareForStreamingAlignments(); 75 | 76 | std::string htsFilePath_; 77 | std::string referencePath_; 78 | ReferenceContigInfo contigInfo_; 79 | Status status_ = Status::kStreamingReads; 80 | 81 | htsFile* htsFilePtr_ = nullptr; 82 | bam1_t* htsAlignmentPtr_ = nullptr; 83 | bam_hdr_t* htsHeaderPtr_ = nullptr; 84 | }; 85 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/async_logger.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | // Very fast asynchronous logger (millions of logs per second on an average 9 | // desktop) 10 | // Uses pre allocated lockfree queue for maximum throughput even under large 11 | // number of threads. 12 | // Creates a single back thread to pop messages from the queue and log them. 13 | // 14 | // Upon each log write the logger: 15 | // 1. Checks if its log level is enough to log the message 16 | // 2. Push a new copy of the message to a queue (or block the caller until 17 | // space is available in the queue) 18 | // 3. will throw spdlog_ex upon log exceptions 19 | // Upon destruction, logs all remaining messages in the queue before 20 | // destructing.. 21 | 22 | #include "spdlog/common.h" 23 | #include "spdlog/logger.h" 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | namespace spdlog { 30 | 31 | // Async overflow policy - block by default. 32 | enum class async_overflow_policy 33 | { 34 | block, // Block until message can be enqueued 35 | overrun_oldest // Discard oldest message in the queue if full when trying to 36 | // add new item. 37 | }; 38 | 39 | namespace details { 40 | class thread_pool; 41 | } 42 | 43 | class async_logger final : public std::enable_shared_from_this, public logger 44 | { 45 | friend class details::thread_pool; 46 | 47 | public: 48 | template 49 | async_logger(std::string logger_name, It begin, It end, std::weak_ptr tp, 50 | async_overflow_policy overflow_policy = async_overflow_policy::block); 51 | 52 | async_logger(std::string logger_name, sinks_init_list sinks_list, std::weak_ptr tp, 53 | async_overflow_policy overflow_policy = async_overflow_policy::block); 54 | 55 | async_logger(std::string logger_name, sink_ptr single_sink, std::weak_ptr tp, 56 | async_overflow_policy overflow_policy = async_overflow_policy::block); 57 | 58 | std::shared_ptr clone(std::string new_name) override; 59 | 60 | protected: 61 | void sink_it_(details::log_msg &msg) override; 62 | void flush_() override; 63 | 64 | void backend_log_(const details::log_msg &incoming_log_msg); 65 | void backend_flush_(); 66 | 67 | private: 68 | std::weak_ptr thread_pool_; 69 | async_overflow_policy overflow_policy_; 70 | }; 71 | } // namespace spdlog 72 | 73 | #include "details/async_logger_impl.h" 74 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/README.md: -------------------------------------------------------------------------------- 1 | # Boost CMake [![Build Status](https://dev.azure.com/Orphis/boost-cmake/_apis/build/status/Orphis.boost-cmake?branchName=master)](https://dev.azure.com/Orphis/boost-cmake/_build/latest?definitionId=1?branchName=master) 2 | 3 | ## Synopsis 4 | 5 | Easy Boost integration in CMake projects! 6 | 7 | ## Code Example 8 | 9 | Install the submodule in your project structure: 10 | ``` 11 | git submodule add https://github.com/Orphis/boost-cmake.git 12 | ``` 13 | Use it! In your CMakeLists.txt file: 14 | 15 | ``` 16 | add_subdirectory(boost-cmake) 17 | ... 18 | target_link_libraries(lib_using_filesystem PUBLIC Boost::filesystem) 19 | target_link_libraries(lib_using_header_only PUBLIC Boost::boost) 20 | ``` 21 | 22 | ## Configuration 23 | 24 | Boost will automatically be downloaded from GitHub as a minified archive created with the `repack.sh` script. 25 | 26 | If that is not acceptable to you, you can use an alternate Boost version, apply custom patches or just mirror the current archive in your internal network like so: 27 | ``` 28 | set(BOOST_URL http://internal.mirror/boost.7z) 29 | set(BOOST_URL_SHA256 foobar) 30 | add_subdirectory(boost-cmake) 31 | ``` 32 | 33 | For more advanced configuration, you can override the way to download the sources using [FetchContent_Declare](https://cmake.org/cmake/help/latest/module/FetchContent.html): 34 | ``` 35 | FetchContent_Declare( 36 | Boost 37 | SVN_REPOSITORY "svn+ssh://svn.company.com/boost" 38 | ) 39 | ``` 40 | 41 | If you have Boost sources already available and want to point to them, you can use the following: 42 | ``` 43 | set(FETCHCONTENT_SOURCE_DIR_BOOST /path/to/boost) 44 | add_subdirectory(boost-cmake) 45 | ``` 46 | 47 | ## Motivation 48 | 49 | Most people struggle building Boost for various platforms or using package managers to get the right version, so I figured I would open-source the solution similar to the one I developed while I worked at Spotify. 50 | 51 | Using this, as long as your main project is configured properly, Boost will be built with the same compiler, same architectures (in case of universal macOS or iOS build), same compilation flags (think of Clang sanitizers for example) without any hassle. 52 | 53 | The Boost sources will be automatically downloaded from CMake if they cannot be found. You can also fork the project and add the boost source package if you wish to do so, or use an alternative URL pointing for example to an HTTP cache internal to your network. 54 | 55 | ## Contributors 56 | 57 | Not all libraries are currently built. Patches accepted to build the remaining ones! 58 | 59 | ## License 60 | 61 | BSD 3-clause license. See LICENSE.md file. 62 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/sinks/dist_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2015 David Schury, Gabi Melman 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #ifndef SPDLOG_H 9 | #include "spdlog/spdlog.h" 10 | #endif 11 | 12 | #include "base_sink.h" 13 | #include "spdlog/details/log_msg.h" 14 | #include "spdlog/details/null_mutex.h" 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | // Distribution sink (mux). Stores a vector of sinks which get called when log 22 | // is called 23 | 24 | namespace spdlog { 25 | namespace sinks { 26 | 27 | template 28 | class dist_sink : public base_sink 29 | { 30 | public: 31 | dist_sink() = default; 32 | dist_sink(const dist_sink &) = delete; 33 | dist_sink &operator=(const dist_sink &) = delete; 34 | 35 | void add_sink(std::shared_ptr sink) 36 | { 37 | std::lock_guard lock(base_sink::mutex_); 38 | sinks_.push_back(sink); 39 | } 40 | 41 | void remove_sink(std::shared_ptr sink) 42 | { 43 | std::lock_guard lock(base_sink::mutex_); 44 | sinks_.erase(std::remove(sinks_.begin(), sinks_.end(), sink), sinks_.end()); 45 | } 46 | 47 | void set_sinks(std::vector> sinks) 48 | { 49 | std::lock_guard lock(base_sink::mutex_); 50 | sinks_ = std::move(sinks); 51 | } 52 | 53 | protected: 54 | void sink_it_(const details::log_msg &msg) override 55 | { 56 | 57 | for (auto &sink : sinks_) 58 | { 59 | if (sink->should_log(msg.level)) 60 | { 61 | sink->log(msg); 62 | } 63 | } 64 | } 65 | 66 | void flush_() override 67 | { 68 | for (auto &sink : sinks_) 69 | { 70 | sink->flush(); 71 | } 72 | } 73 | 74 | void set_pattern_(const std::string &pattern) override 75 | { 76 | set_formatter_(details::make_unique(pattern)); 77 | } 78 | 79 | void set_formatter_(std::unique_ptr sink_formatter) override 80 | { 81 | base_sink::formatter_ = std::move(sink_formatter); 82 | for (auto &sink : sinks_) 83 | { 84 | sink->set_formatter(base_sink::formatter_->clone()); 85 | } 86 | } 87 | std::vector> sinks_; 88 | }; 89 | 90 | using dist_sink_mt = dist_sink; 91 | using dist_sink_st = dist_sink; 92 | 93 | } // namespace sinks 94 | } // namespace spdlog 95 | -------------------------------------------------------------------------------- /examples/outlier/example_dataset.multisample_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "Counts": { 3 | "AGC": { 4 | "IrrPairCounts": { 5 | "sample1": 5 6 | }, 7 | "RegionsWithIrrAnchors": { 8 | "StrA:1702-2159": { 9 | "sample1": 42, 10 | "sample10": 2, 11 | "sample11": 15, 12 | "sample13": 8, 13 | "sample14": 13, 14 | "sample16": 14, 15 | "sample17": 1, 16 | "sample18": 3, 17 | "sample19": 20, 18 | "sample2": 1, 19 | "sample20": 2, 20 | "sample21": 4, 21 | "sample4": 3, 22 | "sample5": 12, 23 | "sample6": 7, 24 | "sample7": 15, 25 | "sample8": 2, 26 | "sample9": 11 27 | } 28 | } 29 | } 30 | }, 31 | "Parameters": { 32 | "Depths": { 33 | "sample1": 39.61557663504743, 34 | "sample10": 37.89316025961058, 35 | "sample11": 38.04293559660509, 36 | "sample12": 37.74338492261607, 37 | "sample13": 38.04293559660509, 38 | "sample14": 38.04293559660509, 39 | "sample15": 37.74338492261607, 40 | "sample16": 38.1927109335996, 41 | "sample17": 37.89316025961058, 42 | "sample18": 38.04293559660509, 43 | "sample19": 38.34248627059411, 44 | "sample2": 37.89316025961058, 45 | "sample20": 37.89316025961058, 46 | "sample21": 38.04293559660509, 47 | "sample3": 37.74338492261607, 48 | "sample4": 37.89316025961058, 49 | "sample5": 38.04293559660509, 50 | "sample6": 38.1927109335996, 51 | "sample7": 38.1927109335996, 52 | "sample8": 37.89316025961058, 53 | "sample9": 38.1927109335996 54 | }, 55 | "ReadLengths": { 56 | "sample1": 150, 57 | "sample10": 150, 58 | "sample11": 150, 59 | "sample12": 150, 60 | "sample13": 150, 61 | "sample14": 150, 62 | "sample15": 150, 63 | "sample16": 150, 64 | "sample17": 150, 65 | "sample18": 150, 66 | "sample19": 150, 67 | "sample2": 150, 68 | "sample20": 150, 69 | "sample21": 150, 70 | "sample3": 150, 71 | "sample4": 150, 72 | "sample5": 150, 73 | "sample6": 150, 74 | "sample7": 150, 75 | "sample8": 150, 76 | "sample9": 150 77 | } 78 | } 79 | } -------------------------------------------------------------------------------- /scripts/outlier/motifworkflow.py: -------------------------------------------------------------------------------- 1 | # 2 | # ExpansionHunter Denovo 3 | # Copyright 2016-2019 Illumina, Inc. 4 | # All rights reserved. 5 | # 6 | # Author: Egor Dolzhenko , 7 | # Michael Eberle 8 | # 9 | # Licensed under the Apache License, Version 2.0 (the "License"); 10 | # you may not use this file except in compliance with the License. 11 | # You may obtain a copy of the License at 12 | # 13 | # http://www.apache.org/licenses/LICENSE-2.0 14 | # 15 | # Unless required by applicable law or agreed to in writing, software 16 | # distributed under the License is distributed on an "AS IS" BASIS, 17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | # See the License for the specific language governing permissions and 19 | # limitations under the License. 20 | # 21 | # 22 | 23 | import json 24 | import logging 25 | from collections import namedtuple 26 | 27 | import core.common as common 28 | 29 | Parameters = namedtuple( 30 | "Parameters", ["manifest_path", "multisample_profile_path", "output_path"] 31 | ) 32 | 33 | 34 | def run(params): 35 | with open(params.multisample_profile_path, "r") as profile_file: 36 | multisample_profile = json.load(profile_file) 37 | count_table = common.generate_table_with_irr_pair_counts( 38 | multisample_profile["Counts"] 39 | ) 40 | 41 | logging.info("Loaded %i regions", len(count_table)) 42 | 43 | logging.info("Normalizing counts") 44 | sample_stats = multisample_profile["Parameters"] 45 | common.depth_normalize_counts(sample_stats, count_table) 46 | 47 | manifest = common.load_manifest(params.manifest_path) 48 | sample_status = common.extract_case_control_assignments(manifest) 49 | 50 | header = "motif\ttop_case_zscore\thigh_case_counts\tcounts" 51 | with open(params.output_path, "w") as results_file: 52 | print(header, file=results_file) 53 | for row in count_table: 54 | top_case_zscore, cases_with_high_counts = common.run_zscore_analysis( 55 | sample_status, row["sample_counts"] 56 | ) 57 | 58 | if len(cases_with_high_counts) == 0: 59 | continue 60 | 61 | encoded_case_info = ",".join( 62 | "{}:{:.2f}".format(s, c) for s, c in cases_with_high_counts.items() 63 | ) 64 | count_encoding = ",".join( 65 | ["{:.2f}".format(c) for _, c in row["sample_counts"].items()] 66 | ) 67 | 68 | motif = row["unit"] 69 | print( 70 | motif, 71 | "{:.2f}".format(top_case_zscore), 72 | encoded_case_info, 73 | count_encoding, 74 | sep="\t", 75 | file=results_file, 76 | ) 77 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/libs/serialization.cmake: -------------------------------------------------------------------------------- 1 | 2 | 3 | if(USE_WINDOWS) 4 | set(SERIALIZATION_WIN_SRCS 5 | ${BOOST_SOURCE}/libs/serialization/src/basic_text_wiprimitive.cpp 6 | ${BOOST_SOURCE}/libs/serialization/src/basic_text_woprimitive.cpp 7 | ${BOOST_SOURCE}/libs/serialization/src/text_wiarchive.cpp 8 | ${BOOST_SOURCE}/libs/serialization/src/text_woarchive.cpp 9 | ${BOOST_SOURCE}/libs/serialization/src/xml_wgrammar.cpp 10 | ${BOOST_SOURCE}/libs/serialization/src/xml_wiarchive.cpp 11 | ${BOOST_SOURCE}/libs/serialization/src/xml_woarchive.cpp 12 | ) 13 | set(CXXFLAGS "/Gy") 14 | endif() 15 | 16 | _add_boost_lib( 17 | NAME serialization 18 | SOURCES 19 | ${SERIALIZATION_WIN_SRCS} 20 | ${BOOST_SOURCE}/libs/serialization/src/archive_exception.cpp 21 | ${BOOST_SOURCE}/libs/serialization/src/basic_archive.cpp 22 | ${BOOST_SOURCE}/libs/serialization/src/basic_iarchive.cpp 23 | ${BOOST_SOURCE}/libs/serialization/src/basic_iserializer.cpp 24 | ${BOOST_SOURCE}/libs/serialization/src/basic_oarchive.cpp 25 | ${BOOST_SOURCE}/libs/serialization/src/basic_oserializer.cpp 26 | ${BOOST_SOURCE}/libs/serialization/src/basic_pointer_iserializer.cpp 27 | ${BOOST_SOURCE}/libs/serialization/src/basic_pointer_oserializer.cpp 28 | ${BOOST_SOURCE}/libs/serialization/src/basic_serializer_map.cpp 29 | ${BOOST_SOURCE}/libs/serialization/src/basic_text_iprimitive.cpp 30 | ${BOOST_SOURCE}/libs/serialization/src/basic_text_oprimitive.cpp 31 | ${BOOST_SOURCE}/libs/serialization/src/basic_xml_archive.cpp 32 | ${BOOST_SOURCE}/libs/serialization/src/basic_xml_grammar.ipp 33 | ${BOOST_SOURCE}/libs/serialization/src/binary_iarchive.cpp 34 | ${BOOST_SOURCE}/libs/serialization/src/binary_oarchive.cpp 35 | ${BOOST_SOURCE}/libs/serialization/src/binary_wiarchive.cpp 36 | ${BOOST_SOURCE}/libs/serialization/src/binary_woarchive.cpp 37 | ${BOOST_SOURCE}/libs/serialization/src/codecvt_null.cpp 38 | ${BOOST_SOURCE}/libs/serialization/src/extended_type_info.cpp 39 | ${BOOST_SOURCE}/libs/serialization/src/extended_type_info_no_rtti.cpp 40 | ${BOOST_SOURCE}/libs/serialization/src/extended_type_info_typeid.cpp 41 | ${BOOST_SOURCE}/libs/serialization/src/polymorphic_iarchive.cpp 42 | ${BOOST_SOURCE}/libs/serialization/src/polymorphic_oarchive.cpp 43 | ${BOOST_SOURCE}/libs/serialization/src/stl_port.cpp 44 | ${BOOST_SOURCE}/libs/serialization/src/text_iarchive.cpp 45 | ${BOOST_SOURCE}/libs/serialization/src/text_oarchive.cpp 46 | ${BOOST_SOURCE}/libs/serialization/src/utf8_codecvt_facet.cpp 47 | ${BOOST_SOURCE}/libs/serialization/src/void_cast.cpp 48 | ${BOOST_SOURCE}/libs/serialization/src/xml_archive_exception.cpp 49 | ${BOOST_SOURCE}/libs/serialization/src/xml_grammar.cpp 50 | ${BOOST_SOURCE}/libs/serialization/src/xml_iarchive.cpp 51 | ${BOOST_SOURCE}/libs/serialization/src/xml_oarchive.cpp 52 | CXXFLAGS_PRIVATE 53 | $<$:/Gy> 54 | $<$>:-fvisibility=hidden -fvisibility-inlines-hidden -ftemplate-depth-255> 55 | ) 56 | -------------------------------------------------------------------------------- /source/io/Reference.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #include "io/Reference.hh" 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | using std::string; 29 | using std::to_string; 30 | using std::vector; 31 | 32 | Reference::Reference(const string& referencePath) 33 | : referencePath_(referencePath) 34 | , contigInfo_({}) 35 | { 36 | htsFastaIndexPtr_ = fai_load(referencePath_.c_str()); 37 | 38 | std::vector> internalNamesAndSizes; 39 | 40 | for (int contigIndex = 0; contigIndex != faidx_nseq(htsFastaIndexPtr_); ++contigIndex) 41 | { 42 | const char* sequenceName = faidx_iseq(htsFastaIndexPtr_, contigIndex); 43 | int64_t sequenceLength = faidx_seq_len(htsFastaIndexPtr_, sequenceName); 44 | internalNamesAndSizes.emplace_back(sequenceName, sequenceLength); 45 | } 46 | 47 | contigInfo_ = ReferenceContigInfo(internalNamesAndSizes); 48 | } 49 | 50 | Reference::~Reference() { fai_destroy(htsFastaIndexPtr_); } 51 | 52 | string Reference::getSequence(const string& contigName, int64_t start, int64_t end) const 53 | { 54 | const int contigIndex = contigInfo_.getContigId(contigName); 55 | const char* contigNamePtr = faidx_iseq(htsFastaIndexPtr_, contigIndex); 56 | 57 | int extractedLength; 58 | // This htslib function is 0-based closed but our coordinates are half open 59 | char* sequencePtr = faidx_fetch_seq(htsFastaIndexPtr_, contigNamePtr, start, end - 1, &extractedLength); 60 | 61 | if (!sequencePtr || extractedLength < 0 || extractedLength < end - start) 62 | { 63 | const string encoding(contigName + ":" + to_string(start) + "-" + to_string(end)); 64 | const string message = "Unable to extract " + encoding + " from " + referencePath_; 65 | throw std::runtime_error(message); 66 | } 67 | 68 | string sequence("N", extractedLength); 69 | std::transform(sequencePtr, sequencePtr + extractedLength, sequence.begin(), ::toupper); 70 | free(sequencePtr); 71 | 72 | return sequence; 73 | } 74 | 75 | string Reference::getSequence(const GenomicRegion& region) const 76 | { 77 | return getSequence(contigInfo_.getContigName(region.contigId()), region.start(), region.end()); 78 | } 79 | -------------------------------------------------------------------------------- /scripts/casecontrol/motifworkflow.py: -------------------------------------------------------------------------------- 1 | # 2 | # ExpansionHunter Denovo 3 | # Copyright 2016-2019 Illumina, Inc. 4 | # All rights reserved. 5 | # 6 | # Author: Egor Dolzhenko , 7 | # Michael Eberle 8 | # 9 | # Licensed under the Apache License, Version 2.0 (the "License"); 10 | # you may not use this file except in compliance with the License. 11 | # You may obtain a copy of the License at 12 | # 13 | # http://www.apache.org/licenses/LICENSE-2.0 14 | # 15 | # Unless required by applicable law or agreed to in writing, software 16 | # distributed under the License is distributed on an "AS IS" BASIS, 17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | # See the License for the specific language governing permissions and 19 | # limitations under the License. 20 | # 21 | # 22 | 23 | import json 24 | import logging 25 | 26 | from collections import namedtuple 27 | 28 | from core import common 29 | 30 | Parameters = namedtuple( 31 | "Parameters", 32 | [ 33 | "manifest_path", 34 | "multisample_profile_path", 35 | "min_inrepeat_read_pairs", 36 | "output_path", 37 | "test_params", 38 | ], 39 | ) 40 | 41 | 42 | def output_results(count_table, output_path): 43 | header = ["motif", "pvalue", "bonf_pvalue", "counts"] 44 | header = "\t".join(header) 45 | with open(output_path, "w") as output_file: 46 | print(header, file=output_file) 47 | for row in count_table: 48 | unit = row["unit"] 49 | pvalue, bonf_pvalue = row["pvalue"], row["bonf_pvalue"] 50 | 51 | sample_counts = row["sample_counts"] 52 | encoded_counts = ["{}:{}".format(s, c) for s, c in sample_counts.items()] 53 | encoded_counts = ",".join(encoded_counts) 54 | print(unit, pvalue, bonf_pvalue, encoded_counts, sep="\t", file=output_file) 55 | 56 | 57 | def run(params): 58 | with open(params.multisample_profile_path, "r") as profile_file: 59 | multisample_profile = json.load(profile_file) 60 | count_table = common.generate_table_with_irr_pair_counts( 61 | multisample_profile["Counts"] 62 | ) 63 | 64 | logging.info("Loaded %i regions", len(count_table)) 65 | 66 | logging.info("Normalizing counts") 67 | sample_stats = multisample_profile["Parameters"] 68 | common.depth_normalize_counts(sample_stats, count_table) 69 | 70 | logging.info("Filtering counts") 71 | count_table = common.filter_counts_by_magnitude( 72 | count_table, params.min_inrepeat_read_pairs 73 | ) 74 | 75 | logging.info("%i regions left after filtering", len(count_table)) 76 | manifest = common.load_manifest(params.manifest_path) 77 | sample_status = common.extract_case_control_assignments(manifest) 78 | 79 | logging.info("Comparing counts") 80 | common.compare_counts(params.test_params, sample_status, count_table) 81 | logging.info("Correcting p-values") 82 | common.correct_pvalues(count_table) 83 | output_results(count_table, params.output_path) 84 | logging.info("Done") 85 | -------------------------------------------------------------------------------- /source/thirdparty/spdlog/fmt/bundled/locale.h: -------------------------------------------------------------------------------- 1 | // Formatting library for C++ - std::locale support 2 | // 3 | // Copyright (c) 2012 - present, Victor Zverovich 4 | // All rights reserved. 5 | // 6 | // For the license information refer to format.h. 7 | 8 | #ifndef FMT_LOCALE_H_ 9 | #define FMT_LOCALE_H_ 10 | 11 | #include "format.h" 12 | #include 13 | 14 | FMT_BEGIN_NAMESPACE 15 | 16 | namespace internal { 17 | template 18 | typename buffer_context::type::iterator vformat_to( 19 | const std::locale &loc, basic_buffer &buf, 20 | basic_string_view format_str, 21 | basic_format_args::type> args) { 22 | typedef back_insert_range > range; 23 | return vformat_to>( 24 | buf, to_string_view(format_str), args, internal::locale_ref(loc)); 25 | } 26 | 27 | template 28 | std::basic_string vformat( 29 | const std::locale &loc, basic_string_view format_str, 30 | basic_format_args::type> args) { 31 | basic_memory_buffer buffer; 32 | internal::vformat_to(loc, buffer, format_str, args); 33 | return fmt::to_string(buffer); 34 | } 35 | } 36 | 37 | template 38 | inline std::basic_string vformat( 39 | const std::locale &loc, const S &format_str, 40 | basic_format_args::type> args) { 41 | return internal::vformat(loc, to_string_view(format_str), args); 42 | } 43 | 44 | template 45 | inline std::basic_string format( 46 | const std::locale &loc, const S &format_str, const Args &... args) { 47 | return internal::vformat( 48 | loc, to_string_view(format_str), 49 | *internal::checked_args(format_str, args...)); 50 | } 51 | 52 | template 53 | inline typename std::enable_if::value, 54 | OutputIt>::type 55 | vformat_to(OutputIt out, const std::locale &loc, const String &format_str, 56 | typename format_args_t::type args) { 57 | typedef output_range range; 58 | return vformat_to>( 59 | range(out), to_string_view(format_str), args, internal::locale_ref(loc)); 60 | } 61 | 62 | template 63 | inline typename std::enable_if< 64 | internal::is_string::value && 65 | internal::is_output_iterator::value, OutputIt>::type 66 | format_to(OutputIt out, const std::locale &loc, const S &format_str, 67 | const Args &... args) { 68 | internal::check_format_string(format_str); 69 | typedef typename format_context_t::type context; 70 | format_arg_store as{args...}; 71 | return vformat_to(out, loc, to_string_view(format_str), 72 | basic_format_args(as)); 73 | } 74 | 75 | FMT_END_NAMESPACE 76 | 77 | #endif // FMT_LOCALE_H_ 78 | -------------------------------------------------------------------------------- /source/common/Parameters.hh: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #pragma once 23 | 24 | #include 25 | #include 26 | 27 | class PathParameters 28 | { 29 | public: 30 | PathParameters(std::string reads, std::string reference, std::string outputPrefix) 31 | : reads_(std::move(reads)) 32 | , reference_(std::move(reference)) 33 | , outputPrefix_(std::move(outputPrefix)) 34 | { 35 | } 36 | 37 | const std::string& reads() const { return reads_; }; 38 | const std::string& reference() const { return reference_; }; 39 | const std::string& outputPrefix() const { return outputPrefix_; } 40 | 41 | private: 42 | std::string reads_; 43 | std::string reference_; 44 | std::string outputPrefix_; 45 | }; 46 | 47 | class HeuristicParameters 48 | { 49 | public: 50 | HeuristicParameters( 51 | int shortestUnitToConsider, int longestUnitToConsider, int minMapqOfAnchorRead, int maxMapqOfInrepeatRead) 52 | : shortestUnitToConsider_(shortestUnitToConsider) 53 | , longestUnitToConsider_(longestUnitToConsider) 54 | , minMapqOfAnchorRead_(minMapqOfAnchorRead) 55 | , maxMapqOfInrepeatRead_(maxMapqOfInrepeatRead) 56 | { 57 | } 58 | 59 | int shortestUnitToConsider() const { return shortestUnitToConsider_; } 60 | int longestUnitToConsider() const { return longestUnitToConsider_; } 61 | int minMapqOfAnchorRead() const { return minMapqOfAnchorRead_; } 62 | int maxMapqOfInrepeatRead() const { return maxMapqOfInrepeatRead_; } 63 | 64 | private: 65 | int shortestUnitToConsider_; 66 | int longestUnitToConsider_; 67 | int minMapqOfAnchorRead_; 68 | int maxMapqOfInrepeatRead_; 69 | }; 70 | 71 | class ProgramParameters 72 | { 73 | public: 74 | ProgramParameters(PathParameters paths, int readLength, HeuristicParameters heuristics) 75 | : paths_(std::move(paths)) 76 | , readLength_(readLength) 77 | , heuristics_(heuristics) 78 | { 79 | } 80 | 81 | const PathParameters& paths() const { return paths_; } 82 | int readLength() const { return readLength_; } 83 | const HeuristicParameters& heuristics() const { return heuristics_; } 84 | 85 | private: 86 | PathParameters paths_; 87 | int readLength_; 88 | HeuristicParameters heuristics_; 89 | }; 90 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ExpansionHunter Denovo 2 | 3 | ExpansionHunter Denovo (EHdn) is a suite of tools for detecting novel expansions 4 | of short tandem repeats (STRs). EHdn is intended for analysis of a collection of 5 | BAM/CRAM files containing alignments of short (100-200bp) reads. 6 | 7 | ![workflow](documentation/images/workflow.png) 8 | 9 | As shown in the figure above, the analysis workflow consists of two steps. 10 | During the first step, genome-wide STR profiles are extracted from the input BAM 11 | files. The STR profiles contain information about reads that originate in STRs 12 | longer than the read length. The second step involves comparing STR profiles to 13 | each other. The type of comparison depends on the dataset: 14 | 15 | | Analysis type | Dataset | 16 | |---------------|-----------------------------------------------------------------| 17 | | Case-control | Cases are enriched in expansions of the same STR | 18 | | Outlier | Only a few cases are expected to contain the same STR expansion | 19 | 20 | For example, if a case-control analysis is applied to a dataset consisting of 21 | ALS patients and healthy controls, then it is expected to flag the GGCCCC repeat 22 | in *C9orf72* gene as highly significant. On the other hand, if cases consist of 23 | samples from patients with diverse phenotypes, it might be appropriate to assume 24 | that there is no enrichment for any specific expansion and hence the 25 | case-control analysis is not appropriate. In this situation, an outlier analysis 26 | can be used to flag repeats that are expanded in a small proportion of cases 27 | compared to the rest of the dataset. 28 | 29 | ## Features 30 | 31 | - Approximate location and nucleotide composition of STRs are inferred 32 | automatically. 33 | - A single BAM/CRAM file can be analyzed in about 30 mins to 5 hours on a 34 | typical workstation. The exact runtime will depend on the sensitivity settings. 35 | 36 | ## Limitations 37 | 38 | - STRs shorter than the read length are ignored; the program is appropriate 39 | only for detecting expansions that exceed the read length. 40 | - The location of each reported STR is approximate (up to about 500bp-1Kbp) 41 | - STRs are not genotyped; the program reports a depth-normalized count of reads 42 | originating inside each STR; this count can be used as a very approximate 43 | measure of the repeat length 44 | - To achieve best results all samples must be sequenced on the same instrument 45 | to similar coverage, have the same read and fragment lengths, and be subjected 46 | to the same computational pre-processing (e.g. reads must be aligned by the 47 | same aligner) 48 | 49 | ## Documentation 50 | 51 | See [documentation](documentation/00_Introduction.md) for installation 52 | instructions, usage guide, and description of file formats. 53 | 54 | ## License 55 | 56 | ExpansionHunter Denovo is provided under the terms and conditions of the [PolyForm Strict License 1.0.0](LICENSE.txt). It relies on several third party packages 57 | provided under other open-source licenses, please see [COPYRIGHT.txt](COPYRIGHT.txt) 58 | for additional details. 59 | -------------------------------------------------------------------------------- /source/io/HtsHelpers.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // ExpansionHunter Denovo 3 | // Copyright 2016-2019 Illumina, Inc. 4 | // All rights reserved. 5 | // 6 | // Author: Egor Dolzhenko , 7 | // Michael Eberle 8 | // 9 | // Licensed under the PolyForm Strict License 1.0.0 10 | // you may not use this file except in compliance with the License. 11 | // You may obtain a copy of the License at 12 | // 13 | // https://polyformproject.org/licenses/strict/1.0.0 14 | // 15 | // As far as the law allows, the software comes as is, without 16 | // any warranty or condition, and the licensor will not be liable 17 | // to you for any damages arising out of these terms or the use 18 | // or nature of the software, under any kind of legal claim. 19 | // See the License for the specific language governing permissions and 20 | // limitations under the License. 21 | 22 | #include "io/HtsHelpers.hh" 23 | 24 | #include 25 | 26 | using std::pair; 27 | using std::string; 28 | using std::vector; 29 | 30 | string decodeQuals(bam1_t* htsAlignPtr) 31 | { 32 | string quals; 33 | uint8_t* htsQualsPtr = bam_get_qual(htsAlignPtr); 34 | const int readLength = htsAlignPtr->core.l_qseq; 35 | quals.resize(readLength); 36 | 37 | for (int index = 0; index < readLength; ++index) 38 | { 39 | quals[index] = static_cast(33 + htsQualsPtr[index]); 40 | } 41 | 42 | return quals; 43 | } 44 | 45 | string decodeBases(bam1_t* htsAlignPtr) 46 | { 47 | string bases; 48 | uint8_t* htsSeqPtr = bam_get_seq(htsAlignPtr); 49 | const int32_t readLength = htsAlignPtr->core.l_qseq; 50 | bases.resize(readLength); 51 | 52 | for (int32_t index = 0; index < readLength; ++index) 53 | { 54 | bases[index] = seq_nt16_str[bam_seqi(htsSeqPtr, index)]; 55 | } 56 | 57 | return bases; 58 | } 59 | 60 | Read decodeHtsRead(bam1_t* htsAlignPtr) 61 | { 62 | Read read; 63 | 64 | read.bases = decodeBases(htsAlignPtr); 65 | read.flag = htsAlignPtr->core.flag; 66 | read.mapq = htsAlignPtr->core.qual; 67 | read.name = bam_get_qname(htsAlignPtr); 68 | read.pos = htsAlignPtr->core.pos; 69 | read.quals = decodeQuals(htsAlignPtr); 70 | read.contigId = htsAlignPtr->core.tid; 71 | read.mateContigId = htsAlignPtr->core.mtid; 72 | read.matePos = htsAlignPtr->core.mpos; 73 | 74 | return read; 75 | } 76 | 77 | bool isPrimaryAlignment(bam1_t* htsAlignPtr) 78 | { 79 | return !((htsAlignPtr->core.flag & BAM_FSECONDARY) || (htsAlignPtr->core.flag & BAM_FSUPPLEMENTARY)); 80 | } 81 | 82 | ReferenceContigInfo decodeContigInfo(bam_hdr_t* htsHeaderPtr) 83 | { 84 | vector> contigNamesAndSizes; 85 | const int32_t numContigs = htsHeaderPtr->n_targets; 86 | contigNamesAndSizes.reserve(numContigs); 87 | 88 | for (int32_t contigIndex = 0; contigIndex != numContigs; ++contigIndex) 89 | { 90 | const string contig = htsHeaderPtr->target_name[contigIndex]; 91 | int64_t size = htsHeaderPtr->target_len[contigIndex]; 92 | contigNamesAndSizes.push_back(std::make_pair(contig, size)); 93 | } 94 | 95 | return ReferenceContigInfo(contigNamesAndSizes); 96 | } 97 | -------------------------------------------------------------------------------- /source/thirdparty/boost-cmake/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.12) 2 | project(Boost-CMake) 3 | 4 | option(BOOST_DISABLE_TESTS "Do not build test targets, even if building standalone" OFF) 5 | 6 | set(BOOST_URL "https://boostorg.jfrog.io/artifactory/main/release/1.71.0/source/boost_1_71_0.tar.bz2" CACHE STRING "Boost download URL") 7 | set(BOOST_URL_SHA256 "d73a8da01e8bf8c7eda40b4c84915071a8c8a0df4a6734537ddde4a8580524ee" CACHE STRING "Boost download URL SHA256 checksum") 8 | 9 | include(FetchContent) 10 | FetchContent_Declare( 11 | Boost 12 | URL ${BOOST_URL} 13 | URL_HASH SHA256=${BOOST_URL_SHA256} 14 | ) 15 | FetchContent_GetProperties(Boost) 16 | 17 | if(NOT Boost_POPULATED) 18 | message(STATUS "Fetching Boost") 19 | FetchContent_Populate(Boost) 20 | message(STATUS "Fetching Boost - done") 21 | set(BOOST_SOURCE ${boost_SOURCE_DIR}) 22 | endif() 23 | 24 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules) 25 | include(CheckBoostVersion) 26 | 27 | message(STATUS "Boost found: ${BOOST_VERSION} ${BOOST_SOURCE}") 28 | 29 | include(StandaloneBuild) 30 | include(PlatformDetect) 31 | include(AddBoostLib) 32 | include(AddBoostTest) 33 | 34 | set_property(GLOBAL PROPERTY USE_FOLDERS TRUE) 35 | 36 | if(USE_ANDROID) 37 | # CMake 3.7.1 doesn't define the target triple for the ASM language, 38 | # resulting in all files compiled for the host architecture 39 | set(CMAKE_ASM_COMPILER_TARGET "${CMAKE_CXX_COMPILER_TARGET}") 40 | endif() 41 | 42 | set(BOOST_LIBS_REQUIRED 43 | # Header only libs 44 | header 45 | ) 46 | set(BOOST_LIBS_OPTIONAL 47 | # Compiled libs 48 | atomic 49 | chrono 50 | container 51 | context 52 | coroutine 53 | date_time 54 | exception 55 | fiber 56 | filesystem 57 | graph 58 | iostreams 59 | locale 60 | log 61 | math 62 | mpi 63 | graph_parallel # depends on mpi, so needs to be put after it 64 | program_options 65 | #python # complex module 66 | random 67 | regex 68 | serialization 69 | system 70 | test 71 | thread 72 | timer 73 | type_erasure 74 | wave 75 | CACHE STRING "Boost libs to be compiled" 76 | ) 77 | 78 | foreach(lib ${BOOST_LIBS_REQUIRED}) 79 | include("libs/${lib}.cmake") 80 | endforeach() 81 | 82 | foreach(lib ${BOOST_LIBS_OPTIONAL}) 83 | # In case only a subset of modules is available (eg. after using bcp) 84 | if(EXISTS "${BOOST_SOURCE}/libs/${lib}") 85 | include("libs/${lib}.cmake") 86 | endif() 87 | endforeach() 88 | 89 | # TODO: Move those to option() calls in the right file 90 | if(NOT BOOST_STANDALONE) 91 | # Compilation options required by all platforms 92 | target_compile_definitions(Boost::boost INTERFACE 93 | $<$:BOOST_DISABLE_ASSERT> 94 | BOOST_SYSTEM_NO_DEPRECATED 95 | BOOST_THREAD_VERSION=4 96 | BOOST_THREAD_USES_CHRONO 97 | BOOST_THREAD_PROVIDES_EXECUTORS 98 | ) 99 | endif() 100 | 101 | if(USE_ANDROID) 102 | # Android doesn't support thread local storage through compiler intrinsics 103 | target_compile_definitions(Boost::boost INTERFACE BOOST_ASIO_DISABLE_THREAD_KEYWORD_EXTENSION) 104 | endif() 105 | --------------------------------------------------------------------------------