├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── RELEASE ├── VERSION ├── cpp ├── .clang-format ├── BUILD ├── CMakeLists.txt ├── Doxyfile ├── WORKSPACE ├── src │ ├── alignment │ │ ├── align.cpp │ │ ├── align.hpp │ │ ├── alignScorer.hpp │ │ ├── aligner.cpp │ │ ├── aligner.hpp │ │ ├── cigar.cpp │ │ ├── cigar.hpp │ │ ├── cigarItems.cpp │ │ ├── cigarItems.hpp │ │ ├── galign.cpp │ │ ├── galign.hpp │ │ └── mmHelpers.hpp │ ├── assembly │ │ ├── node.cpp │ │ ├── node.hpp │ │ ├── sequenceGraph.cpp │ │ └── sequenceGraph.hpp │ ├── caller │ │ ├── alignPhasing.cpp │ │ ├── alignPhasing.hpp │ │ ├── annotation.cpp │ │ ├── annotation.hpp │ │ ├── callSet.cpp │ │ ├── callSet.hpp │ │ ├── candidateVariantBank.cpp │ │ ├── candidateVariantBank.hpp │ │ ├── diploid │ │ │ ├── diploid.cpp │ │ │ ├── diploid.hpp │ │ │ ├── diploidAnnotate.cpp │ │ │ ├── diploidAnnotate.hpp │ │ │ ├── genotypeUtils.cpp │ │ │ ├── genotypeUtils.hpp │ │ │ ├── readSupportAccountant.cpp │ │ │ ├── readSupportAccountant.hpp │ │ │ ├── referenceCalling.cpp │ │ │ ├── referenceCalling.hpp │ │ │ ├── variantQualityCalculator.cpp │ │ │ └── variantQualityCalculator.hpp │ │ ├── haplotypeLikelihoods.cpp │ │ ├── haplotypeLikelihoods.hpp │ │ ├── job.cpp │ │ ├── job.hpp │ │ ├── jobReduce.cpp │ │ ├── jobReduce.hpp │ │ ├── metadata.hpp │ │ ├── params.cpp │ │ ├── params.hpp │ │ ├── region.cpp │ │ ├── region.hpp │ │ ├── regionUtils.cpp │ │ ├── regionUtils.hpp │ │ └── typedAnnotation.hpp │ ├── common.hpp │ ├── io │ │ ├── bamFile.cpp │ │ ├── bamFile.hpp │ │ ├── bamFileIterator.cpp │ │ ├── bamFileIterator.hpp │ │ ├── bedFile.cpp │ │ ├── bedFile.hpp │ │ ├── fastaFile.cpp │ │ ├── fastaFile.hpp │ │ ├── pysam.cpp │ │ ├── pysam.hpp │ │ ├── read.cpp │ │ ├── read.hpp │ │ ├── readDataReader.cpp │ │ ├── readDataReader.hpp │ │ ├── readDataSet.cpp │ │ ├── readDataSet.hpp │ │ ├── readIntervalTree.hpp │ │ ├── readRange.cpp │ │ ├── readRange.hpp │ │ ├── readSummaries.cpp │ │ ├── readSummaries.hpp │ │ ├── readUtils.cpp │ │ ├── readUtils.hpp │ │ ├── readfilters │ │ │ ├── baseQualityFilter.cpp │ │ │ ├── baseQualityFilter.hpp │ │ │ ├── booleanFilter.cpp │ │ │ ├── booleanFilter.hpp │ │ │ ├── mapQualityFilter.cpp │ │ │ ├── mapQualityFilter.hpp │ │ │ ├── rangeFilter.hpp │ │ │ ├── readFilter.cpp │ │ │ ├── readFilter.hpp │ │ │ ├── readFilterAndTrimmer.cpp │ │ │ ├── readFilterAndTrimmer.hpp │ │ │ ├── shortReadFilter.cpp │ │ │ └── shortReadFilter.hpp │ │ ├── tabixFile.cpp │ │ ├── tabixFile.hpp │ │ ├── tabixVCFFile.cpp │ │ ├── tabixVCFFile.hpp │ │ ├── vcfWriter.cpp │ │ └── vcfWriter.hpp │ ├── mainpage.dox │ ├── mapping │ │ ├── hashMapper.cpp │ │ └── hashMapper.hpp │ ├── readrecalibration │ │ ├── commonTypes.hpp │ │ ├── errorCorrectionParameters.hpp │ │ ├── intermediateOutputWriter.cpp │ │ ├── intermediateOutputWriter.hpp │ │ ├── kmerDistribution.cpp │ │ ├── kmerDistribution.hpp │ │ ├── readDataForErrorPosterior.cpp │ │ ├── readDataForErrorPosterior.hpp │ │ ├── readRecalibration.cpp │ │ ├── readRecalibration.hpp │ │ ├── siteKmerDistribution.cpp │ │ ├── siteKmerDistribution.hpp │ │ ├── siteReadDataForErrorPosterior.cpp │ │ └── siteReadDataForErrorPosterior.hpp │ ├── stats │ │ ├── functions.cpp │ │ ├── functions.hpp │ │ ├── models.cpp │ │ └── models.hpp │ ├── utils │ │ ├── NeedlemanWunsch.cpp │ │ ├── NeedlemanWunsch.hpp │ │ ├── bestScoreSelector.cpp │ │ ├── bestScoreSelector.hpp │ │ ├── combinationGenerator.hpp │ │ ├── combinations.hpp │ │ ├── date.hpp │ │ ├── exceptions.hpp │ │ ├── flatten.hpp │ │ ├── identity.hpp │ │ ├── indexedProduct.hpp │ │ ├── interval.cpp │ │ ├── interval.hpp │ │ ├── intervalTree.hpp │ │ ├── logging.cpp │ │ ├── logging.hpp │ │ ├── matrix.cpp │ │ ├── matrix.hpp │ │ ├── median.hpp │ │ ├── multinomialCoefficients.cpp │ │ ├── multinomialCoefficients.hpp │ │ ├── partition.hpp │ │ ├── referenceSequence.cpp │ │ ├── referenceSequence.hpp │ │ ├── sequence.cpp │ │ ├── sequence.hpp │ │ ├── timer.cpp │ │ ├── timer.hpp │ │ └── write.hpp │ ├── varfilters │ │ ├── filter.cpp │ │ ├── filter.hpp │ │ ├── variantSoftFilterBank.cpp │ │ └── variantSoftFilterBank.hpp │ ├── variant │ │ ├── breakpointVariantGenerator.cpp │ │ ├── breakpointVariantGenerator.hpp │ │ ├── clustering.cpp │ │ ├── clustering.hpp │ │ ├── genotype.cpp │ │ ├── genotype.hpp │ │ ├── haplotype.cpp │ │ ├── haplotype.hpp │ │ ├── haplotypeGenerator.cpp │ │ ├── haplotypeGenerator.hpp │ │ ├── haplotypeRanker.cpp │ │ ├── haplotypeRanker.hpp │ │ ├── snpFinder.cpp │ │ ├── snpFinder.hpp │ │ ├── type │ │ │ ├── breakpoint.cpp │ │ │ ├── breakpoint.hpp │ │ │ ├── variant.cpp │ │ │ └── variant.hpp │ │ ├── variantCombinations.cpp │ │ ├── variantCombinations.hpp │ │ ├── variantContainer.cpp │ │ ├── variantContainer.hpp │ │ ├── variantFilter.cpp │ │ ├── variantFilter.hpp │ │ ├── variantGenerationData.cpp │ │ ├── variantGenerationData.hpp │ │ ├── variantGenerator.cpp │ │ ├── variantGenerator.hpp │ │ ├── variantNormalizer.cpp │ │ └── variantNormalizer.hpp │ ├── vcf │ │ ├── field.cpp │ │ ├── field.hpp │ │ ├── filterDescription.cpp │ │ ├── filterDescription.hpp │ │ ├── header.cpp │ │ ├── header.hpp │ │ ├── reader.cpp │ │ ├── reader.hpp │ │ ├── record.cpp │ │ └── record.hpp │ ├── version │ │ ├── version.cpp.template │ │ └── version.hpp │ ├── weCall.cpp │ ├── weCallBase.hpp │ ├── weCallMapAndReduce.cpp │ ├── weCallMapAndReduce.hpp │ ├── weCallReduce.cpp │ └── weCallReduce.hpp └── test │ ├── ioTest │ ├── caller │ │ └── testRegionUtils.cpp │ ├── io │ │ ├── ioFixture.hpp │ │ ├── testBedFile.cpp │ │ ├── testBuildRefCall.cpp │ │ ├── testFastaFile.cpp │ │ ├── testRead.cpp │ │ ├── testReadDataset.cpp │ │ ├── testReadIntervalTree.cpp │ │ ├── testReadRange.cpp │ │ ├── testReadSummaries.cpp │ │ ├── testReadUtils.cpp │ │ └── testVCFWriter.cpp │ ├── ioTest.cpp │ ├── readrecalibration │ │ └── testKmerDistrubution.cpp │ └── utils │ │ ├── environment.cpp │ │ └── environment.hpp │ └── unittest │ ├── alignment │ ├── testCigar.cpp │ ├── testCigarItems.cpp │ ├── testGAlign.cpp │ └── testMMHelpers.cpp │ ├── assembly │ ├── testNode.cpp │ └── testSequenceGraph.cpp │ ├── caller │ ├── testAlignPhasing.cpp │ ├── testCandidateVariantBank.cpp │ ├── testParams.cpp │ ├── testRegion.cpp │ └── testRegionUtils.cpp │ ├── mapping │ └── testHashMapper.cpp │ ├── readFilters │ └── testReadFilterAndTrimmer.cpp │ ├── readrecalibration │ ├── testCommonTypes.cpp │ ├── testReadRecalibration.cpp │ └── testSiteKmerDistribution.cpp │ ├── stats │ └── testFunctions.cpp │ ├── unittest.cpp │ ├── utils │ ├── testBestScoreSelector.cpp │ ├── testFactorial.cpp │ ├── testFlatten.cpp │ ├── testInterval.cpp │ ├── testMatrix.cpp │ ├── testMedian.cpp │ ├── testMultinomialCoefficients.cpp │ ├── testNWPenalties.cpp │ ├── testNWVariant.cpp │ ├── testNeedlemanWunsch.cpp │ ├── testPartition.cpp │ ├── testReferenceSequence.cpp │ ├── testSequence.cpp │ └── testWrite.cpp │ ├── varfilters │ └── testVariantSoftFilterBank.cpp │ ├── variant │ ├── testBreakpoint.cpp │ ├── testBreakpointVariantGenerator.cpp │ ├── testClustering.cpp │ ├── testDeletion.cpp │ ├── testGenotype.cpp │ ├── testGenotypeVector.cpp │ ├── testGenotypeVectorForPloidy1.cpp │ ├── testGenotypeVectorForPloidy2.cpp │ ├── testGenotypeVectorForPloidy3.cpp │ ├── testHaplotype.cpp │ ├── testHaplotypeGeneration.cpp │ ├── testHaplotypeVector.cpp │ ├── testInsertion.cpp │ ├── testMnp.cpp │ ├── testSnp.cpp │ ├── testSnpFinder.cpp │ ├── testVariant.cpp │ ├── testVariantCombinations.cpp │ ├── testVariantContainer.cpp │ ├── testVariantFilter.cpp │ ├── testVariantGenerator.cpp │ └── testVariantNormalizer.cpp │ └── vcf │ ├── VCFTestUtils.cpp │ ├── VCFTestUtils.hpp │ ├── testField.cpp │ ├── testReader.cpp │ └── testRecord.cpp ├── doc └── weCall-userguide.tex ├── python ├── __init__.py ├── setup.py └── wecall │ ├── __init__.py │ ├── bamutils │ ├── __init__.py │ ├── bam_builder.py │ ├── bam_region_iterator.py │ ├── cigar.py │ ├── raw_string_sequence.py │ ├── read_sequence.py │ ├── sample_bank.py │ ├── sequence.py │ ├── sequence_bank.py │ ├── sequence_builder.py │ ├── sequence_position.py │ └── sequence_quality.py │ ├── bedutils │ ├── __init__.py │ ├── bedrecord.py │ └── bedwriter.py │ ├── common │ ├── __init__.py │ └── exceptions.py │ ├── fastautils │ ├── __init__.py │ └── fasta_file_builder.py │ ├── genomics │ ├── __init__.py │ ├── chromosome.py │ ├── reference_chromosome.py │ ├── reference_genome.py │ └── variant.py │ ├── utils │ ├── __init__.py │ ├── interval.py │ ├── tabix_indexer.py │ └── tabix_wrapper.py │ ├── vcfutils │ ├── README.md │ ├── __init__.py │ ├── fieldmetadata.py │ ├── genotype_call.py │ ├── info_data.py │ ├── parser.py │ ├── record.py │ ├── sample_data.py │ ├── schema.py │ ├── stringutils.py │ ├── vcf_builder.py │ └── writer.py │ └── wecall_utils │ ├── __init__.py │ ├── log_utils.py │ ├── wecall_config_builder.py │ ├── wecall_input_data.py │ └── wecall_input_data_builder.py ├── scripts ├── clang-reformat.sh ├── get-property.py ├── help_to_latex.py ├── make-docs.sh ├── renderTemplate.py ├── run-tests.sh └── static-checks.sh ├── test-drivers ├── __init__.py ├── setup.py └── wecall_test_drivers │ ├── __init__.py │ ├── ascii_quality_recalibration_runner.py │ ├── ascii_wecall_runner.py │ ├── base_test.py │ ├── svc_driver.py │ ├── timed_command.py │ ├── tool_runner.py │ ├── variant_caller_builder.py │ ├── variant_caller_wrapper.py │ ├── variant_callset.py │ ├── vcf_expectation.py │ ├── wecall_config_file_test_runnner.py │ └── wecall_schema.py ├── test ├── __init__.py ├── test_style │ └── test_wecall_pep8.py ├── test_utils │ ├── __init__.py │ ├── bamutils │ │ ├── __init__.py │ │ ├── test_bam_builder.py │ │ ├── test_bam_file_iterator.py │ │ ├── test_cigar.py │ │ ├── test_sample_bank.py │ │ ├── test_sequence.py │ │ ├── test_sequence_bank.py │ │ ├── test_sequence_builder.py │ │ ├── test_sequence_position.py │ │ └── test_sequence_quality.py │ ├── bedutils │ │ ├── __init__.py │ │ └── test_bedwriter.py │ ├── fastautils │ │ ├── __init__.py │ │ └── test_fasta_file_builder.py │ ├── genomics │ │ ├── __init__.py │ │ ├── test_reference_chromosome.py │ │ ├── test_reference_genome.py │ │ └── test_variant.py │ ├── utils │ │ ├── __init__.py │ │ ├── test_interval.py │ │ └── test_tabix_wrapper.py │ ├── vcfutils │ │ ├── __init__.py │ │ ├── example_data │ │ │ └── vcf_example.vcf │ │ ├── test_chromosome.py │ │ ├── test_fieldmetadata.py │ │ ├── test_genotype_call.py │ │ ├── test_info_data.py │ │ ├── test_parser.py │ │ ├── test_record.py │ │ ├── test_sample_data.py │ │ ├── test_schema.py │ │ └── test_writer.py │ └── wecall_utils │ │ ├── __init__.py │ │ └── test_wecall_input_data_builder.py └── wecall_acceptance │ ├── __init__.py │ ├── call_filters │ ├── __init__.py │ ├── test_bad_reads_filter.py │ ├── test_combined_allele_strand_bias.py │ ├── test_min_root_mean_square_mapping_quality_filter.py │ ├── test_quality_filter.py │ ├── test_quality_over_depth_filter.py │ ├── test_strand_bias.py │ └── test_var_filter_ids.py │ ├── calling_using_skipped_sequence │ ├── __init__.py │ └── test_calling_with_skipped_sequence_basic.py │ ├── genotyping │ ├── __init__.py │ ├── test_genotyping_in_clean_data.py │ ├── test_input_file.py │ └── test_single_sample.py │ ├── malformed_inputs │ ├── __init__.py │ ├── test_calling_with_non_standard_bases_in_data.py │ ├── test_malformed_BAM.py │ └── test_non_canonical_bases.py │ ├── multi_sample_diploid │ ├── __init__.py │ └── test_trio.py │ ├── output_representations │ ├── __init__.py │ ├── test_alignment.py │ ├── test_indels_next_to_snps.py │ └── test_normalized_variant_calls.py │ ├── phased_genotypes │ ├── __init__.py │ ├── test_align_phasing_of_clusters.py │ ├── test_multi_sample_diploid.py │ └── test_single_sample_diploid.py │ ├── ploidy │ ├── __init__.py │ ├── test_ploidy_1.py │ └── test_ploidy_3.py │ ├── read_filters │ ├── __init__.py │ ├── no_similar_reads.py │ ├── test_no_duplicates.py │ └── test_non_proper_pair.py │ ├── reference_calling │ ├── __init__.py │ ├── test_annotations.py │ ├── test_basic_calling.py │ ├── test_cli.py │ ├── test_multi_sample_ref_calling.py │ └── test_quality.py │ ├── regions_specification │ ├── __init__.py │ ├── test_bed_file_format.py │ ├── test_block_boundaries.py │ ├── test_calls_inside_bed_file_regions.py │ ├── test_region_padding.py │ └── test_small_regions.py │ ├── single_sample_diploid │ ├── __init__.py │ ├── test_allele_bias.py │ ├── test_calling_in_clean_data.py │ ├── test_calling_in_data_with_read_errors.py │ ├── test_calling_in_data_with_repetitive_sequence.py │ ├── test_calls_mnps.py │ ├── test_candidate_variant_specification.py │ ├── test_indels_at_edge_of_reads.py │ ├── test_info_annotation.py │ ├── test_output_all_variants.py │ ├── test_quality_recalibration.py │ ├── test_read_counts.py │ ├── test_strand_bias.py │ ├── test_variant_clustering.py │ └── test_with_genotypes.py │ ├── somatic_variant_calls │ ├── __init__.py │ └── test_calling_variant_with_low_percentage_support.py │ └── wecall_runner │ ├── __init__.py │ ├── config │ └── basic_calling_test.config │ ├── test_cmd_line_options.py │ ├── test_from_config_file.py │ ├── test_variant_caller_timings.py │ ├── test_vcf_schema.py │ ├── test_weCall_parallelisation.py │ └── test_wecall_reduce.py └── vendor ├── Makefile ├── README ├── samtools ├── .gitignore ├── AUTHORS ├── COPYING ├── ChangeLog.old ├── INSTALL ├── Makefile ├── Makefile.mingw ├── NEWS ├── bam.c ├── bam.h ├── bam2bcf.c ├── bam2bcf.h ├── bam2bcf_indel.c ├── bam2depth.c ├── bam_aux.c ├── bam_cat.c ├── bam_color.c ├── bam_endian.h ├── bam_import.c ├── bam_index.c ├── bam_lpileup.c ├── bam_mate.c ├── bam_md.c ├── bam_pileup.c ├── bam_plcmd.c ├── bam_reheader.c ├── bam_rmdup.c ├── bam_rmdupse.c ├── bam_sort.c ├── bam_stat.c ├── bam_tview.c ├── bam_tview.h ├── bam_tview_curses.c ├── bam_tview_html.c ├── bamshuf.c ├── bamtk.c ├── bcftools │ ├── Makefile │ ├── README │ ├── bcf.c │ ├── bcf.h │ ├── bcf.tex │ ├── bcf2qcall.c │ ├── bcfutils.c │ ├── call1.c │ ├── em.c │ ├── fet.c │ ├── index.c │ ├── kfunc.c │ ├── kmin.c │ ├── kmin.h │ ├── main.c │ ├── mut.c │ ├── prob1.c │ ├── prob1.h │ ├── vcf.c │ └── vcfutils.pl ├── bedcov.c ├── bedidx.c ├── bgzf.c ├── bgzf.h ├── bgzip.c ├── binary.yaml ├── cut_target.c ├── deploy.yaml ├── errmod.c ├── errmod.h ├── examples │ ├── 00README.txt │ ├── Makefile │ ├── bam2bed.c │ ├── calDepth.c │ ├── chk_indel.c │ ├── ex1.fa │ ├── ex1.sam.gz │ ├── toy.fa │ └── toy.sam ├── faidx.c ├── faidx.h ├── kaln.c ├── kaln.h ├── khash.h ├── klist.h ├── knetfile.c ├── knetfile.h ├── kprobaln.c ├── kprobaln.h ├── kseq.h ├── ksort.h ├── kstring.c ├── kstring.h ├── misc │ ├── HmmGlocal.java │ ├── Makefile │ ├── ace2sam.c │ ├── bamcheck.c │ ├── blast2sam.pl │ ├── bowtie2sam.pl │ ├── export2sam.pl │ ├── interpolate_sam.pl │ ├── maq2sam.c │ ├── md5.c │ ├── md5.h │ ├── md5fa.c │ ├── novo2sam.pl │ ├── plot-bamcheck │ ├── psl2sam.pl │ ├── r2plot.lua │ ├── sam2vcf.pl │ ├── samtools.pl │ ├── soap2sam.pl │ ├── varfilter.py │ ├── vcfutils.lua │ ├── wgsim.c │ ├── wgsim_eval.pl │ └── zoom2sam.pl ├── padding.c ├── phase.c ├── razf.c ├── razf.h ├── razip.c ├── sam.c ├── sam.h ├── sam_header.c ├── sam_header.h ├── sam_view.c ├── sample.c ├── sample.h ├── samtools.1 ├── version.yaml └── win32 │ ├── xcurses.h │ ├── zconf.h │ └── zlib.h └── tabix ├── .gitignore ├── ChangeLog ├── Makefile ├── NEWS ├── TabixReader.java ├── bam_endian.h ├── bedidx.c ├── bgzf.c ├── bgzf.h ├── bgzip.c ├── binary.yaml ├── deploy.yaml ├── example.gtf.gz ├── example.gtf.gz.tbi ├── index.c ├── khash.h ├── knetfile.c ├── knetfile.h ├── kseq.h ├── ksort.h ├── kstring.c ├── kstring.h ├── main.c ├── perl ├── MANIFEST ├── Makefile.PL ├── Tabix.pm ├── Tabix.xs ├── TabixIterator.pm ├── t │ ├── 01local.t │ └── 02remote.t └── typemap ├── python ├── setup.py ├── tabixmodule.c └── test.py ├── tabix.1 ├── tabix.h ├── tabix.py ├── tabix.tex └── version.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | build 3 | 4 | scripts/version.pyc 5 | .coverage 6 | /build/dist/ 7 | /projects/*/dist/ 8 | 9 | # binary/compiled files 10 | /bin/ 11 | /lib/ 12 | *.o 13 | performance_testing/vcs 14 | tags 15 | /source-package/ 16 | /dependencies/ 17 | /env-wecall 18 | 19 | # logging 20 | *.log 21 | 22 | # valgrind 23 | callgrind.out.* 24 | 25 | # editor temp files 26 | *~ 27 | *.kate-swp 28 | *# 29 | *.swp 30 | 31 | # user project configs 32 | .idea 33 | cpp/.idea 34 | 35 | # packaging 36 | /deployable 37 | /python/dist 38 | /python/build 39 | /test-drivers/dist 40 | /test-drivers/build 41 | /properties.json 42 | .pytest_cache/ 43 | 44 | # report folder 45 | /report 46 | /results 47 | 48 | # Third party tools 49 | /third-party-tools 50 | 51 | cpp/cmake-build-*e* 52 | cpp/src/version/version.cpp 53 | README.txt 54 | ./Licences.txt 55 | wecall-*.tar.gz 56 | conda-bld 57 | weCall-userguide.pdf 58 | /Licences.txt 59 | 60 | *.deb 61 | *.rpm 62 | *.egg-info 63 | weCall-Linux-x86_64.tar.gz 64 | environment_production.yaml 65 | environment_test.yaml 66 | /doc/wecall-params.tex 67 | __pycache__ 68 | test-results 69 | xunit-status.xml 70 | .cache 71 | *.py.bak 72 | .pyest.cache 73 | wecall-env 74 | 75 | #vendor noise 76 | vendor/samtools/bcftools/bcftools 77 | vendor/samtools/misc/ace2sam 78 | vendor/samtools/misc/bamcheck 79 | vendor/samtools/misc/maq2sam-long 80 | vendor/samtools/misc/maq2sam-short 81 | vendor/samtools/misc/md5fa 82 | vendor/samtools/misc/md5sum-lite 83 | vendor/samtools/misc/wgsim 84 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | os: linux 2 | dist: trusty 3 | sudo: required 4 | addons: 5 | apt: 6 | packages: 7 | - lcov 8 | - astyle 9 | - devscripts 10 | - python3 11 | - python3-pip 12 | - python3-dev 13 | - python3.4-venv 14 | - libboost-all-dev 15 | - libncurses5-dev 16 | - zlib1g-dev 17 | - doxygen 18 | - fakeroot 19 | - debhelper 20 | - pkg-config 21 | - alien 22 | - rpm 23 | - dh-make 24 | 25 | matrix: 26 | include: 27 | - language: c 28 | script: 29 | - make wecall test-unit test-acceptance 30 | -------------------------------------------------------------------------------- /RELEASE: -------------------------------------------------------------------------------- 1 | 1 2 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 2.0.1 2 | -------------------------------------------------------------------------------- /cpp/.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: Google 4 | AccessModifierOffset: -4 5 | ConstructorInitializerIndentWidth: 4 6 | AlignEscapedNewlinesLeft: true 7 | AlignTrailingComments: true 8 | AllowAllParametersOfDeclarationOnNextLine: false 9 | AllowShortBlocksOnASingleLine: true 10 | AllowShortIfStatementsOnASingleLine: false 11 | AllowShortLoopsOnASingleLine: false 12 | AllowShortFunctionsOnASingleLine: All 13 | AlwaysBreakTemplateDeclarations: true 14 | AlwaysBreakBeforeMultilineStrings: true 15 | BreakBeforeBinaryOperators: false 16 | BreakBeforeTernaryOperators: true 17 | BreakConstructorInitializersBeforeComma: false 18 | BinPackParameters: false 19 | ColumnLimit: 120 20 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 21 | DerivePointerAlignment: false 22 | IndentCaseLabels: false 23 | IndentWrappedFunctionNames: false 24 | IndentFunctionDeclarationAfterType: false 25 | MaxEmptyLinesToKeep: 1 26 | KeepEmptyLinesAtTheStartOfBlocks: true 27 | NamespaceIndentation: Inner 28 | PenaltyBreakBeforeFirstCallParameter: 1 29 | PenaltyBreakComment: 300 30 | PenaltyBreakString: 1000 31 | PenaltyBreakFirstLessLess: 120 32 | PenaltyExcessCharacter: 1000000 33 | PenaltyReturnTypeOnItsOwnLine: 200 34 | PointerAlignment: Middle 35 | SpacesBeforeTrailingComments: 2 36 | Cpp11BracedListStyle: true 37 | Standard: Cpp11 38 | IndentWidth: 4 39 | TabWidth: 4 40 | UseTab: Never 41 | AllowShortBlocksOnASingleLine: true 42 | BreakBeforeBraces: Allman 43 | SpacesInParentheses: true 44 | SpacesInAngles: true 45 | SpaceInEmptyParentheses: false 46 | SpacesInCStyleCastParentheses: false 47 | SpacesInContainerLiterals: false 48 | SpaceBeforeAssignmentOperators: true 49 | ContinuationIndentWidth: 4 50 | CommentPragmas: '' 51 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] 52 | SpaceBeforeParens: ControlStatements 53 | DisableFormat: false 54 | ... 55 | -------------------------------------------------------------------------------- /cpp/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | cc_library( 4 | name = "libgenomics.so", 5 | srcs = ["BUILD"] + glob([ 6 | "src/**/*.cpp" 7 | ]), 8 | hdrs = ["BUILD"] + glob([ 9 | "src/**/*.hpp", 10 | ]), 11 | linkshared = 1, 12 | ) 13 | 14 | cc_binary( 15 | name = "weCall", 16 | srcs = ["src/weCall.cpp"], 17 | deps = [":libgenomics.so"], 18 | ) 19 | 20 | filegroup( 21 | name = "srcs", 22 | srcs = ["BUILD"] + glob([ 23 | "**/*.cpp", 24 | "**/*.hpp", 25 | ]), 26 | ) 27 | -------------------------------------------------------------------------------- /cpp/WORKSPACE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/cpp/WORKSPACE -------------------------------------------------------------------------------- /cpp/src/alignment/align.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef ALIGN_HPP 3 | #define ALIGN_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace wecall 11 | { 12 | namespace alignment 13 | { 14 | using errorModel_t = std::vector< int16_t >; 15 | using localGapOpenPenalties_t = errorModel_t; 16 | 17 | int needlemanWunschAlignment( std::string::const_iterator haplotype, 18 | std::string::const_iterator readSeq, 19 | const char * readQual, 20 | const unsigned int haplotypeLength, 21 | const unsigned int readLength, 22 | const unsigned short gapextend, 23 | const unsigned short nucprior, 24 | const localGapOpenPenalties_t & localgapopen, 25 | char * aln1, 26 | char * aln2, 27 | int * const firstpos, 28 | const int o ///< offset 29 | ); 30 | } 31 | } 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /cpp/src/alignment/alignScorer.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef ALIGN_SCORER_HPP 3 | #define ALIGN_SCORER_HPP 4 | 5 | namespace wecall 6 | { 7 | /// All alignment algorithms and associated classes live in 8 | /// this namespace 9 | namespace alignment 10 | { 11 | ///---------------------------------------------------------------------------------------- 12 | /// Functor-style class to score alignment matches and gaps. 13 | ///---------------------------------------------------------------------------------------- 14 | class AlignScorer 15 | { 16 | public: 17 | AlignScorer() = delete; 18 | explicit AlignScorer( const int gapPenalty ) : m_gapPenalty( gapPenalty ) {} 19 | explicit AlignScorer( const AlignScorer & rhs ) = delete; 20 | 21 | int scoreMatch( const char x, const char y ) const { return x == y ? 1 : -1; } 22 | int scoreGap() const { return m_gapPenalty; } 23 | 24 | private: 25 | const int m_gapPenalty; /// Gap extension penalty 26 | }; 27 | } 28 | } 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /cpp/src/alignment/aligner.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef ALIGNER_HPP 3 | #define ALIGNER_HPP 4 | 5 | #include "common.hpp" 6 | 7 | namespace wecall 8 | { 9 | namespace io 10 | { 11 | class Read; 12 | } // namespace io 13 | 14 | namespace mapping 15 | { 16 | class HashMapper; 17 | } // namespace mapping 18 | 19 | namespace alignment 20 | { 21 | class GAlign; 22 | 23 | //----------------------------------------------------------------------------------------- 24 | 25 | double computeLikelihoodForReadAndHaplotype( const io::Read & theRead, 26 | const int64_t hintPosition, 27 | const mapping::HashMapper & mapper, 28 | const alignment::GAlign & aligner ); 29 | //----------------------------------------------------------------------------------------- 30 | } 31 | } 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /cpp/src/alignment/galign.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef GALIGN_HPP 3 | #define GALIGN_HPP 4 | 5 | #include 6 | #include 7 | #include "utils/interval.hpp" 8 | #include "utils/sequence.hpp" 9 | #include "align.hpp" 10 | 11 | namespace wecall 12 | { 13 | namespace alignment 14 | { 15 | localGapOpenPenalties_t computeGapOpen( const utils::BasePairSequence & haplotypeSequence, 16 | const errorModel_t & errorModel ); 17 | 18 | utils::Interval allowableStartPositionsForAlignment( const int64_t haplotypeLength, 19 | const int64_t readLength, 20 | const int paddingLength ); 21 | 22 | class GAlign 23 | { 24 | public: 25 | GAlign( const utils::BasePairSequence & haplotypeSequence, 26 | const unsigned short gapExtend, 27 | const unsigned short nucleotidePrior, 28 | const localGapOpenPenalties_t & localGapOpen ); 29 | 30 | int computeAlignmentPhredScore( const utils::BasePairSequence & readSeq, 31 | const utils::QualitySequence & qual, 32 | const int pos, 33 | char * aln1 = NULL, 34 | char * aln2 = NULL ) const; 35 | 36 | private: 37 | const utils::BasePairSequence m_haplotypeSequence; 38 | const unsigned short m_gapExtend; 39 | const unsigned short m_nucPrior; 40 | const localGapOpenPenalties_t m_localGapOpen; 41 | }; 42 | } 43 | } 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /cpp/src/assembly/node.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include 3 | #include 4 | #include 5 | 6 | #include "assembly/node.hpp" 7 | #include 8 | 9 | namespace wecall 10 | { 11 | namespace assembly 12 | { 13 | 14 | void Node::addInEdge( const std::shared_ptr< Node > & inEdge ) 15 | { 16 | const auto priorCharacter = inEdge->firstCharacter(); 17 | m_inEdges.insert( priorCharacter ); 18 | } 19 | 20 | //----------------------------------------------------------------------------------------- 21 | 22 | void Node::addOutEdge( const std::shared_ptr< Node > & outEdge, const std::size_t support ) 23 | { 24 | const auto & nextCharacter = outEdge->lastCharacter(); 25 | m_outEdges[nextCharacter] = outEdge; 26 | m_outCount[nextCharacter] += support; 27 | } 28 | 29 | std::vector< std::shared_ptr< Node > > Node::getSuccessors() const 30 | { 31 | std::vector< std::shared_ptr< Node > > successors; 32 | successors.reserve( m_outEdges.size() ); 33 | for ( const auto & pair : m_outEdges ) 34 | { 35 | successors.push_back( pair.second.lock() ); 36 | } 37 | return successors; 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /cpp/src/caller/callSet.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "caller/callSet.hpp" 3 | 4 | namespace wecall 5 | { 6 | namespace caller 7 | { 8 | bool variantCalled( const genoCalls_t & genoCalls ) 9 | { 10 | bool called = false; 11 | for ( const auto & genoCall : genoCalls ) 12 | { 13 | for ( const auto & call : genoCall ) 14 | { 15 | if ( call == Call::VAR ) 16 | { 17 | called = true; 18 | } 19 | } 20 | } 21 | return called; 22 | } 23 | 24 | int64_t zeroIndexedVCFPosition( const callVector_t & calls ) 25 | { 26 | auto position = std::numeric_limits< int64_t >::max(); 27 | auto variantExists = false; 28 | for ( const auto & call : calls ) 29 | { 30 | if ( not call.isRefCall() ) 31 | { 32 | variantExists = true; 33 | position = std::min( call.var->zeroIndexedVcfPosition(), position ); 34 | } 35 | } 36 | if ( variantExists ) 37 | { 38 | return position; 39 | } 40 | else 41 | { 42 | return -1; 43 | } 44 | } 45 | 46 | //----------------------------------------------------------------------------------------- 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /cpp/src/caller/candidateVariantBank.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include 3 | 4 | #include "utils/sequence.hpp" 5 | #include "vcf/reader.hpp" 6 | #include "caller/region.hpp" 7 | #include "candidateVariantBank.hpp" 8 | 9 | namespace wecall 10 | { 11 | namespace caller 12 | { 13 | 14 | //----------------------------------------------------------------------------------------- 15 | 16 | namespace 17 | { 18 | struct isAlleleFrequency 19 | { 20 | bool operator()( const std::pair< std::string, std::vector< std::string > > & item ) 21 | { 22 | return item.first == std::string( "AF" ); 23 | } 24 | }; 25 | } 26 | 27 | //----------------------------------------------------------------------------------------- 28 | 29 | std::vector< double > getPriorsFromInfo( const vcf::Info & info ) 30 | { 31 | const auto foundPriors = std::find_if( info.begin(), info.end(), isAlleleFrequency() ); 32 | 33 | if ( foundPriors == info.end() ) 34 | { 35 | return std::vector< double >(); 36 | } 37 | else 38 | { 39 | const auto & strPriors = foundPriors->second; 40 | std::vector< double > doublePriors( foundPriors->second.size() ); 41 | 42 | std::transform( strPriors.begin(), strPriors.end(), doublePriors.begin(), []( const std::string & val ) 43 | -> double 44 | { 45 | return std::stod( val ); 46 | } ); 47 | 48 | return doublePriors; 49 | } 50 | } 51 | 52 | //----------------------------------------------------------------------------------------- 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /cpp/src/caller/candidateVariantBank.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef CANDIDATE_VARIANT_BANK_HPP 3 | #define CANDIDATE_VARIANT_BANK_HPP 4 | 5 | #include 6 | 7 | #include "caller/region.hpp" 8 | #include "utils/logging.hpp" 9 | #include "variant/type/variant.hpp" 10 | #include "vcf/record.hpp" 11 | #include "io/fastaFile.hpp" 12 | 13 | namespace wecall 14 | { 15 | namespace caller 16 | { 17 | std::vector< double > getPriorsFromInfo( const vcf::Info & info ); 18 | } 19 | } 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /cpp/src/caller/diploid/genotypeUtils.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef GENOTYPE_UTILS_HPP 3 | #define GENOTYPE_UTILS_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | #include "utils/matrix.hpp" 9 | #include "variant/genotype.hpp" 10 | 11 | namespace wecall 12 | { 13 | namespace caller 14 | { 15 | namespace model 16 | { 17 | template < typename Sequence > 18 | void rescaleLogLikelihoods( Sequence & sequence, bool hasMultipleReads ) 19 | { 20 | if ( hasMultipleReads ) 21 | { 22 | const auto maxLogLikelihood = *std::max_element( sequence.begin(), sequence.end() ); 23 | for ( auto it = sequence.begin(); it != sequence.end(); ++it ) 24 | { 25 | *it = log( std::max( std::numeric_limits< double >::min(), exp( *it - maxLogLikelihood ) ) ); 26 | } 27 | } 28 | else 29 | { 30 | for ( auto it = sequence.begin(); it != sequence.end(); ++it ) 31 | { 32 | *it = 1.0; 33 | } 34 | } 35 | } 36 | 37 | std::vector< double > computeGenotypeLikelihoods( const variant::GenotypeVector & genotypes, 38 | const utils::matrix_t & probReadsGivenHaplotypes, 39 | const variant::HaplotypeVector & haplotypes ); 40 | } 41 | } 42 | } 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /cpp/src/caller/diploid/referenceCalling.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef WECALL_REFERENCECALLING_H 3 | #define WECALL_REFERENCECALLING_H 4 | 5 | #include "caller/callSet.hpp" 6 | #include "io/readRange.hpp" 7 | 8 | namespace wecall 9 | { 10 | namespace caller 11 | { 12 | namespace model 13 | { 14 | std::vector< Call > buildRefCall( caller::Region refInterval, 15 | const io::perSampleRegionsReads_t & reads, 16 | double maxUncalledVarQ, 17 | const std::vector< std::size_t > & ploidy, 18 | const double readQualityDeltaThreshold ); 19 | 20 | double getQualityFromCoverage( const int64_t minCoverage ); 21 | double getRefQFromCoverageRow( const int64_t minCoverage ); 22 | } 23 | } 24 | } 25 | 26 | #endif // WECALL_REFERENCECALLING_H 27 | -------------------------------------------------------------------------------- /cpp/src/caller/haplotypeLikelihoods.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef HAPLOTYPE_LIKELIHOODS_HPP 3 | #define HAPLOTYPE_LIKELIHOODS_HPP 4 | 5 | #include "utils/matrix.hpp" 6 | #include "variant/haplotype.hpp" 7 | #include "io/readRange.hpp" 8 | #include "alignment/galign.hpp" 9 | 10 | namespace wecall 11 | { 12 | namespace caller 13 | { 14 | namespace errorModels 15 | { 16 | const wecall::alignment::errorModel_t illuminaErrorModel = { 17 | 45, 42, 41, 39, 37, 32, 28, 23, 20, 19, 17, 16, 15, 14, 13, 12, 11, 11, 10, 9, 9, 8, 8, 7, 7, 18 | 7, 6, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1}; 19 | } 20 | 21 | utils::matrix_t computeHaplotypeLikelihoods( const variant::HaplotypeVector & haplotypes, 22 | const io::RegionsReads & readRange ); 23 | 24 | std::vector< double > computeHaplotypeFrequencies( const utils::matrix_t & haplotypeLikelihoods ); 25 | } 26 | } 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /cpp/src/caller/jobReduce.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef JOB_REDUCE_HPP 3 | #define JOB_REDUCE_HPP 4 | 5 | #include 6 | #include 7 | 8 | #include "utils/timer.hpp" 9 | #include "caller/params.hpp" 10 | 11 | namespace wecall 12 | { 13 | namespace caller 14 | { 15 | class JobReduce 16 | { 17 | public: 18 | JobReduce( const caller::params::Reduce & reduceParams ); 19 | 20 | void process(); 21 | 22 | private: 23 | void writeRecords( std::ofstream & out ) const; 24 | void writeHeader( std::ofstream & out ) const; 25 | void cleanUp() const; 26 | 27 | private: 28 | const caller::params::Reduce m_reduceParams; 29 | std::vector< boost::filesystem::path > m_inputVCFFilePaths; 30 | utils::timerPtr_t m_timer; 31 | }; 32 | } 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /cpp/src/caller/metadata.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef METADATA_HPP 3 | #define METADATA_HPP 4 | #include "common.hpp" 5 | 6 | namespace wecall 7 | { 8 | namespace caller 9 | { 10 | struct GenotypeMetadata 11 | { 12 | phred_t phaseQuality = constants::unknownValue; 13 | phred_t genotypeQuality = constants::unknownValue; 14 | }; 15 | } 16 | } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /cpp/src/caller/typedAnnotation.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef TYPED_ANNOTATION_HPP 3 | #define TYPED_ANNOTATION_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "caller/annotation.hpp" 14 | 15 | namespace wecall 16 | { 17 | namespace caller 18 | { 19 | /// Dummy function defined so I can use std::to_string with string types 20 | std::string to_string( const std::string & theString ) { return theString; } 21 | 22 | ///--------------------------------------------------------------------------------------------- 23 | 24 | template < typename T > 25 | class TypedAnnotation : public Annotation 26 | { 27 | public: 28 | TypedAnnotation( const std::string & id, const T value ) : m_id( id ), m_value( value ) {} 29 | 30 | virtual const std::string & getID() const { return m_id; } 31 | 32 | virtual std::string getValue() const { return to_string( m_value ); } 33 | 34 | private: 35 | std::string m_id; 36 | T m_value; 37 | }; 38 | 39 | //------------------------------------------------------------------------------------------------- 40 | } 41 | } 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /cpp/src/io/bedFile.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef BEDFILE_HPP 3 | #define BEDFILE_HPP 4 | 5 | #include 6 | #include 7 | 8 | #include "utils/timer.hpp" 9 | #include "caller/region.hpp" 10 | #include "io/fastaFile.hpp" 11 | 12 | namespace wecall 13 | { 14 | namespace io 15 | { 16 | class BedFile 17 | { 18 | public: 19 | explicit BedFile( std::string fileName ); 20 | 21 | caller::regions_t getRegions() const; 22 | 23 | private: 24 | std::vector< std::string > parseRegionLine( std::string line, bool insideHeader, size_t lineNumber ) const; 25 | 26 | caller::regions_t readRegionsFromStream( std::istream & inStream ) const; 27 | 28 | private: 29 | std::string m_filename; 30 | utils::timerPtr_t m_timer; 31 | }; 32 | } 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /cpp/src/io/readIntervalTree.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef READ_INTERVAL_TREE_HPP 3 | #define READ_INTERVAL_TREE_HPP 4 | 5 | #include "io/read.hpp" 6 | #include "utils/intervalTree.hpp" 7 | 8 | #include 9 | 10 | namespace wecall 11 | { 12 | namespace io 13 | { 14 | class ReadAlignedEndPosComp; 15 | class ReadStartPosComp; 16 | /// interval tree typedefs. Issues when int replaced with int64_t in readIntervalTree_t 17 | using readIntervalTree_t = utils::IntervalTree< Read, ReadStartPosComp, ReadAlignedEndPosComp >; 18 | using readIt_t = readIntervalTree_t::iterator; 19 | 20 | /// Utility class for use ordering internal multisets in interval trees. 21 | /// Note:- Reverse ordering on EndPos 22 | class ReadAlignedEndPosComp 23 | { 24 | public: 25 | bool operator()( readPtr_t left, readPtr_t right ) const 26 | { 27 | return this->operator()( left ) > this->operator()( right ); 28 | } 29 | 30 | int64_t operator()( const readPtr_t & readPtr ) const 31 | { 32 | return readPtr->getAlignedEndPos() + readPtr->getLengthAfterAlignedEndPos(); 33 | } 34 | }; 35 | 36 | /// Utility class for use ordering interal multiset for start position in interval tree 37 | class ReadStartPosComp 38 | { 39 | public: 40 | bool operator()( const readPtr_t & left, const readPtr_t & right ) const 41 | { 42 | return this->operator()( left ) < this->operator()( right ); 43 | } 44 | 45 | int64_t operator()( const readPtr_t & readPtr ) const 46 | { 47 | return readPtr->getStartPos() - readPtr->getLengthBeforeAlignedStartPos(); 48 | } 49 | }; 50 | } // io 51 | } // wecall 52 | 53 | #endif // WECALL_READINTERVALTREE_HPP_H 54 | -------------------------------------------------------------------------------- /cpp/src/io/readUtils.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include 3 | #include "io/readUtils.hpp" 4 | 5 | namespace wecall 6 | { 7 | namespace io 8 | { 9 | namespace read 10 | { 11 | phred_t minBaseQualityInReadAroundInterval( const Read & read, 12 | const utils::Interval & interval, 13 | const int64_t padding ) 14 | { 15 | const auto intervalInReadSpace = read.getIntervalInRead( interval ); 16 | const auto paddedInterval = intervalInReadSpace.getPadded( padding ); 17 | const auto intersectPadded = paddedInterval.getIntersect( utils::Interval( 0L, read.getLength() ) ); 18 | 19 | const auto & qualities = read.getQualities(); 20 | 21 | const auto min_element = std::min_element( qualities.cbegin() + intersectPadded.start(), 22 | qualities.cbegin() + intersectPadded.end() ); 23 | 24 | if ( min_element == qualities.cend() ) 25 | { 26 | return 0; 27 | } 28 | else 29 | { 30 | return static_cast< phred_t >( *min_element ); 31 | } 32 | } 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /cpp/src/io/readUtils.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef READ_UTILS_HPP 3 | #define READ_UTILS_HPP 4 | 5 | #include "common.hpp" 6 | #include "io/read.hpp" 7 | 8 | namespace wecall 9 | { 10 | namespace io 11 | { 12 | namespace read 13 | { 14 | phred_t minBaseQualityInReadAroundInterval( const Read & read, 15 | const utils::Interval & interval, 16 | const int64_t padding ); 17 | } 18 | } 19 | } 20 | #endif -------------------------------------------------------------------------------- /cpp/src/io/readfilters/baseQualityFilter.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "io/readfilters/baseQualityFilter.hpp" 3 | #include 4 | 5 | namespace wecall 6 | { 7 | namespace io 8 | { 9 | //----------------------------------------------------------------------------------------- 10 | 11 | bool BaseQualFilter::passesFilter_impl( const io::Read & theRead ) 12 | { 13 | const auto & quals = theRead.getQualities(); 14 | int count = 0; 15 | 16 | for ( int theQual : quals ) 17 | { 18 | if ( theQual > this->m_qualThreshold ) 19 | { 20 | ++count; 21 | 22 | if ( count > this->m_minBases ) 23 | { 24 | return true; 25 | } 26 | } 27 | } 28 | 29 | return false; 30 | } 31 | 32 | //----------------------------------------------------------------------------------------- 33 | 34 | std::string BaseQualFilter::toString() const 35 | { 36 | std::stringstream repr; 37 | repr << "BaseQualFilter(Threshold = " << m_qualThreshold << ", MinBases = " << m_minBases << ")"; 38 | return repr.str(); 39 | } 40 | 41 | //----------------------------------------------------------------------------------------- 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /cpp/src/io/readfilters/baseQualityFilter.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef BASEQUAL_FILTER_HPP 3 | #define BASEQUAL_FILTER_HPP 4 | 5 | #include "io/readfilters/readFilter.hpp" 6 | 7 | namespace wecall 8 | { 9 | namespace io 10 | { 11 | /// Filter which checks the base qualities of the specified read. 12 | class BaseQualFilter : public ReadFilter 13 | { 14 | public: 15 | /// Constructor. 16 | /// 17 | /// @param threshold The minimum acceptable base quality value 18 | /// @param minBases The minimum number of bases that must have Qual > threshold 19 | BaseQualFilter( int threshold, int minBases ) : m_qualThreshold( threshold ), m_minBases( minBases ) {} 20 | 21 | /// Destructor 22 | virtual ~BaseQualFilter(){}; 23 | 24 | /// Convert to string representation 25 | std::string toString() const override; 26 | 27 | private: 28 | virtual bool passesFilter_impl( const io::Read & theRead ) override; 29 | int m_qualThreshold; /// Min value for base quality. 30 | int m_minBases; /// This many bases must have higher qual than threshold 31 | }; 32 | } 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /cpp/src/io/readfilters/booleanFilter.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "io/readfilters/booleanFilter.hpp" 3 | #include 4 | #include 5 | 6 | namespace wecall 7 | { 8 | namespace io 9 | { 10 | //----------------------------------------------------------------------------------------- 11 | 12 | BooleanFilter::BooleanFilter( std::function< const bool(const Read &)> theFunction, 13 | const std::string & filterName, 14 | const bool negate ) 15 | : m_function( theFunction ), m_filterName( filterName ), m_negate( negate ) 16 | { 17 | } 18 | 19 | //----------------------------------------------------------------------------------------- 20 | 21 | bool BooleanFilter::passesFilter_impl( const io::Read & theRead ) 22 | { 23 | if ( not this->m_negate ) 24 | { 25 | return this->m_function( theRead ); 26 | } 27 | else 28 | { 29 | return ( not this->m_function( theRead ) ); 30 | } 31 | } 32 | 33 | //----------------------------------------------------------------------------------------- 34 | 35 | std::string BooleanFilter::toString() const 36 | { 37 | std::stringstream repr; 38 | repr << this->m_filterName << "()"; 39 | return repr.str(); 40 | } 41 | 42 | //----------------------------------------------------------------------------------------- 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /cpp/src/io/readfilters/booleanFilter.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef BOOLEAN_FILTER_HPP 3 | #define BOOLEAN_FILTER_HPP 4 | 5 | #include "io/readfilters/readFilter.hpp" 6 | #include 7 | 8 | namespace wecall 9 | { 10 | namespace io 11 | { 12 | /// Generic filter which takes a boolean function object and evaluates it for each 13 | /// read. 14 | class BooleanFilter : public ReadFilter 15 | { 16 | public: 17 | /// Constructor. 18 | /// 19 | /// @param theFunction. A function which takes read and returns a boolean 20 | /// @param filterName The name of this filter 21 | /// @param negate If true, use the negation of the supplied functions return value to evaluate pass/fail 22 | BooleanFilter( std::function< const bool(const Read &)> theFunction, 23 | const std::string & filterName, 24 | const bool negate = false ); 25 | 26 | virtual ~BooleanFilter(){}; 27 | std::string toString() const override; 28 | 29 | private: 30 | virtual bool passesFilter_impl( const io::Read & theRead ) override; 31 | 32 | std::function< const bool(const Read &)> m_function; /// The function to evaluate 33 | const std::string m_filterName; /// The name of this filter (mainly for reporting/debugging) 34 | const bool m_negate; /// If true, return the negation of the function 35 | }; 36 | } 37 | } 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /cpp/src/io/readfilters/mapQualityFilter.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "io/readfilters/mapQualityFilter.hpp" 3 | #include 4 | 5 | namespace wecall 6 | { 7 | namespace io 8 | { 9 | //----------------------------------------------------------------------------------------- 10 | 11 | bool MapQualFilter::passesFilter_impl( const io::Read & theRead ) 12 | { 13 | return theRead.getMappingQuality() >= m_threshold; 14 | } 15 | 16 | //----------------------------------------------------------------------------------------- 17 | 18 | std::string MapQualFilter::toString() const 19 | { 20 | std::stringstream repr; 21 | repr << "MapQualFilter(Threshold = " << m_threshold << ")"; 22 | return repr.str(); 23 | } 24 | 25 | //----------------------------------------------------------------------------------------- 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /cpp/src/io/readfilters/mapQualityFilter.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef MAPQUAL_FILTER_HPP 3 | #define MAPQUAL_FILTER_HPP 4 | 5 | #include "io/readfilters/readFilter.hpp" 6 | 7 | namespace wecall 8 | { 9 | namespace io 10 | { 11 | /// Filter which simply checks the mapping quality of the specified read. 12 | class MapQualFilter : public ReadFilter 13 | { 14 | public: 15 | /// Constructor. 16 | /// 17 | /// @param threshold the minimum acceptable value for mapping quality. 18 | explicit MapQualFilter( int64_t threshold ) : m_threshold( threshold ) {} 19 | 20 | /// Destructor 21 | virtual ~MapQualFilter() {} 22 | 23 | /// Convert to string representation 24 | virtual std::string toString() const override; 25 | 26 | private: 27 | virtual bool passesFilter_impl( const io::Read & theRead ) override; 28 | int64_t m_threshold; /// Min value for mapping quality. 29 | }; 30 | } 31 | } 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /cpp/src/io/readfilters/readFilter.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "io/readfilters/readFilter.hpp" 3 | 4 | namespace wecall 5 | { 6 | namespace io 7 | { 8 | //----------------------------------------------------------------------------------------- 9 | 10 | bool ReadFilter::passesFilter( const Read & theRead ) { return this->passesFilter_impl( theRead ); } 11 | 12 | //----------------------------------------------------------------------------------------- 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /cpp/src/io/readfilters/readFilter.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef READ_FILTER_HPP 3 | #define READ_FILTER_HPP 4 | 5 | #include "common.hpp" 6 | #include "io/read.hpp" 7 | 8 | #include 9 | 10 | namespace wecall 11 | { 12 | namespace io 13 | { 14 | /// Base class for read filters. 15 | class ReadFilter 16 | { 17 | public: 18 | /// Constructor 19 | ReadFilter() {} 20 | 21 | /// Destructor 22 | virtual ~ReadFilter() {} 23 | 24 | /// Returns a string representation of the filter instance 25 | virtual std::string toString() const = 0; 26 | 27 | /// Return true if the read passes this filter. Otherwise false. 28 | /// 29 | /// @param A single read to check 30 | /// @return The result 31 | bool passesFilter( const io::Read & theRead ); 32 | 33 | private: 34 | /// Virtual function that implements the filter 35 | virtual bool passesFilter_impl( const io::Read & theRead ) = 0; 36 | }; 37 | 38 | using ReadFilterPtr_t = std::shared_ptr< ReadFilter >; 39 | } 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /cpp/src/io/readfilters/readFilterAndTrimmer.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef READ_FILTER_AND_TRIMMER_HPP 3 | #define READ_FILTER_AND_TRIMMER_HPP 4 | 5 | #include 6 | 7 | #include "io/read.hpp" 8 | #include "common.hpp" 9 | #include "caller/params.hpp" 10 | #include "io/readfilters/readFilter.hpp" 11 | 12 | namespace wecall 13 | { 14 | namespace io 15 | { 16 | /// Manages all read filters and trimmers that are applied to individual reads during reading 17 | class ReadFilterAndTrimmer 18 | { 19 | public: 20 | ReadFilterAndTrimmer( const caller::params::Filters & filterParams ); 21 | 22 | /// Trim the read and return true if passed all filters and the trimmed read has length > 0 23 | bool trimAndFilter( readPtr_t read ) const; 24 | 25 | private: 26 | void trim( readPtr_t read ) const; 27 | bool passesFilters( readPtr_t read ) const; 28 | bool hasLength( readPtr_t read ) const; 29 | bool isSimilarToPrevious( readPtr_t read ) const; 30 | 31 | std::vector< ReadFilterPtr_t > m_filters; 32 | 33 | bool m_overlapTrim; 34 | bool m_shortReadTrim; 35 | bool m_noSimilarReads; 36 | 37 | mutable readPtr_t m_previousRead = nullptr; 38 | }; 39 | } 40 | } 41 | 42 | #endif // READ_FILTERS_MANAGER_HPP 43 | -------------------------------------------------------------------------------- /cpp/src/io/readfilters/shortReadFilter.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "io/readfilters/shortReadFilter.hpp" 3 | #include 4 | #include 5 | 6 | namespace wecall 7 | { 8 | namespace io 9 | { 10 | //----------------------------------------------------------------------------------------- 11 | 12 | ShortReadFilter::ShortReadFilter() {} 13 | 14 | //----------------------------------------------------------------------------------------- 15 | 16 | bool ShortReadFilter::passesFilter_impl( const io::Read & theRead ) 17 | { 18 | const auto insertSize = std::abs( theRead.getInsertSize() ); 19 | const auto readLengthNoSoftClipping = theRead.cigar().lengthInSeqWithoutSoftClipping(); 20 | 21 | return insertSize >= readLengthNoSoftClipping; 22 | } 23 | 24 | //----------------------------------------------------------------------------------------- 25 | 26 | std::string ShortReadFilter::toString() const 27 | { 28 | std::stringstream repr; 29 | repr << "ShortReadFilter"; 30 | return repr.str(); 31 | } 32 | 33 | //----------------------------------------------------------------------------------------- 34 | } 35 | } 36 | 37 | // std::function f1 = &Read::getInsertSize; 38 | // std::function f2 = &Read::getLength; 39 | // std::function f3 = [f1, f2](const Read& theRead){ return f1(theRead) < f2(theRead); 40 | // }; 41 | -------------------------------------------------------------------------------- /cpp/src/io/readfilters/shortReadFilter.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef SHORTREAD_FILTER_HPP 3 | #define SHORTREAD_FILTER_HPP 4 | 5 | #include "io/readfilters/readFilter.hpp" 6 | 7 | namespace wecall 8 | { 9 | namespace io 10 | { 11 | /// Simply filters out reads where fragment < 1 read length 12 | class ShortReadFilter : public ReadFilter 13 | { 14 | public: 15 | /// Constructor 16 | /// 17 | ShortReadFilter(); 18 | virtual ~ShortReadFilter() {} 19 | std::string toString() const override; 20 | 21 | private: 22 | virtual bool passesFilter_impl( const io::Read & theRead ) override; 23 | }; 24 | } 25 | } 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /cpp/src/io/tabixFile.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include 3 | #include 4 | 5 | #include "caller/region.hpp" 6 | 7 | #include 8 | 9 | namespace wecall 10 | { 11 | namespace io 12 | { 13 | const std::string headerPrefix = "#"; 14 | 15 | class TabixFile 16 | { 17 | public: 18 | explicit TabixFile( std::string filename, std::string indexFilename ); 19 | 20 | ~TabixFile(); 21 | 22 | std::vector< std::string > header() const { return m_headerLines; } 23 | std::vector< std::string > fetch( const caller::Region & region ) const; 24 | 25 | private: 26 | void readHeader(); 27 | 28 | tabix_t * m_tabixFile; 29 | std::vector< std::string > m_headerLines; 30 | }; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /cpp/src/io/tabixVCFFile.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include 3 | #include 4 | #include "io/tabixFile.hpp" 5 | #include "vcf/record.hpp" 6 | #include "vcf/filterDescription.hpp" 7 | #include "caller/region.hpp" 8 | 9 | namespace wecall 10 | { 11 | namespace io 12 | { 13 | using vcfMetaInformation_t = std::map< std::string, std::string >; 14 | class TabixVCFFile 15 | { 16 | public: 17 | explicit TabixVCFFile( std::string filename, std::string indexFilename ); 18 | 19 | ~TabixVCFFile(); 20 | 21 | std::vector< vcf::Record > fetch( const caller::Region & region ) const; 22 | 23 | static bool containsFilterId( const std::set< vcf::FilterDesc > & filterDescs, const std::string & filterId ); 24 | static std::pair< std::string, std::string > parseMetaInfoLine( std::string line ); 25 | static vcf::FilterDesc parseFilterHeaderLine( const std::string & line ); 26 | 27 | private: 28 | void readHeader( std::string filename ); 29 | 30 | TabixFile m_tabixFile; 31 | 32 | vcfMetaInformation_t m_metaInformation; 33 | std::set< vcf::FilterDesc > m_filterDescs; 34 | }; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /cpp/src/mainpage.dox: -------------------------------------------------------------------------------- 1 | /** 2 | \mainpage The weCall Code Documentation 3 | 4 | These pages contain documentation for the **weCall** project. You can browse through the comments and source 5 | code by following the linkes above. 6 | */ 7 | -------------------------------------------------------------------------------- /cpp/src/readrecalibration/errorCorrectionParameters.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef ERROR_CORRECTION_PARAMETERS_HPP 3 | #define ERROR_CORRECTION_PARAMETERS_HPP 4 | 5 | #include 6 | #include 7 | 8 | #include "common.hpp" 9 | 10 | namespace wecall 11 | { 12 | namespace corrector 13 | { 14 | struct ErrorCorrectionParameters 15 | { 16 | using weightedDistance = std::pair< int, double >; ///< slippage distance and probability that it occurs 17 | 18 | const double pm = 0.8; ///< probability of nucleotide match in error state 19 | const double ps = 0.8; ///< proportion of nucleotide mismatches explained by slippage 20 | const double ptrue = 0.5; ///< proportion of kmers in error state that have proper q scores 21 | const double perr = 0.0005; ///< (prior) per-nuc probability of read turning into error state 22 | const double pref = 0.95; ///< probability of true kmer being reference 23 | const std::vector< weightedDistance > distances = {{-1, 0.5}, {1, 0.5}}; 24 | 25 | const char qualityFloor = 0x05; /// 6 | #include 7 | 8 | #include "io/readRange.hpp" 9 | 10 | namespace wecall 11 | { 12 | namespace corrector 13 | { 14 | class IntermediateOutputWriter 15 | { 16 | 17 | public: 18 | IntermediateOutputWriter( const std::vector< std::string > & inputBams, std::string outputFileStem ); 19 | 20 | void writeReads( const io::perSampleRegionsReads_t & readRangesPerSample, std::string contig ) const; 21 | 22 | private: 23 | std::string sampleFilename( std::string sampleName ) const; 24 | void writeSamHeader( std::string outputFilename, std::string headerText ) const; 25 | 26 | private: 27 | const std::string m_outputFileStem; 28 | std::map< std::string, std::string > m_sampleNameToFileMap; 29 | const bool m_writeOutputFile; 30 | }; 31 | 32 | } // namespace corrector 33 | } // namespace wecall 34 | 35 | #endif // INTERMEDIATE_OUTPUT_WRITER_HPP 36 | -------------------------------------------------------------------------------- /cpp/src/readrecalibration/kmerDistribution.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef KMER_DISTRIBUTION_HPP 3 | #define KMER_DISTRIBUTION_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "io/read.hpp" 10 | #include "io/fastaFile.hpp" 11 | 12 | #include "readrecalibration/siteKmerDistribution.hpp" 13 | 14 | namespace wecall 15 | { 16 | namespace corrector 17 | { 18 | class KmerDistribution 19 | { 20 | 21 | public: 22 | KmerDistribution( const std::string & chromosomeLabel, 23 | const io::FastaFile & fa, 24 | const int readsStart, 25 | const int readsEnd ); 26 | 27 | int start() const { return m_firstReadStart; } 28 | int end() const { return m_lastReadEnd; } 29 | 30 | void updateKmerHistogram( const io::readPtr_t readPtr ); 31 | void finalise( const double probOfTrueKmerBeingRef ); 32 | void resetErrorCountData( const double priorPerNucProbOfReadTurningIntoErrorState ); 33 | void updateErrorPosteriors(); 34 | 35 | void accumulateErrorProbability( const int pos, const double errorProbability, const bool isForward ); 36 | 37 | const SiteKmerDistribution & getSiteKmerDistribution( const int pos ) const; 38 | SiteKmerDistribution & getSiteKmerDistribution( const int pos ); 39 | 40 | private: 41 | double computeRefPrior( const double normalisation, const double probOfTrueKmerBeingRef ); 42 | 43 | int posToIndex( const int pos ) const; 44 | 45 | private: 46 | const int m_firstReadStart; 47 | const int m_lastReadEnd; 48 | 49 | std::vector< SiteKmerDistribution > m_kmerDistribution; 50 | }; 51 | } // namespace corrector 52 | } // namespace wecall 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /cpp/src/readrecalibration/readDataForErrorPosterior.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef READ_DATA_FOR_ERROR_POSTERIOR_HPP 3 | #define READ_DATA_FOR_ERROR_POSTERIOR_HPP 4 | 5 | #include 6 | #include 7 | 8 | #include "readrecalibration/siteReadDataForErrorPosterior.hpp" 9 | #include "readrecalibration/kmerDistribution.hpp" 10 | #include "readrecalibration/errorCorrectionParameters.hpp" 11 | 12 | namespace wecall 13 | { 14 | namespace corrector 15 | { 16 | class ReadDataForErrorPosterior 17 | { 18 | public: 19 | ReadDataForErrorPosterior( wecall::io::readPtr_t read ); 20 | 21 | void calculateProbabilities( const KmerDistribution & kmerDistribution, 22 | const ErrorCorrectionParameters & errorCorrectionParameters ); 23 | 24 | void runHmm( KmerDistribution & kmerDistribution ); 25 | 26 | /** 27 | * "Recalibrate" (set to 2) the quality scores where the read is deemed to be in error mode 28 | */ 29 | void recalibrateRead(); 30 | 31 | private: 32 | double pError( int pos, const KmerDistribution & kmerDistribution ) const 33 | { 34 | return kmerDistribution.getSiteKmerDistribution( pos ).pError( m_isForward ); 35 | } 36 | 37 | private: 38 | wecall::io::readPtr_t m_readPtr; 39 | 40 | std::vector< SiteReadDataForErrorPosterior > m_readData; 41 | bool m_isForward = false; 42 | std::size_t m_start = 0; 43 | }; 44 | } // namespace corrector 45 | } // namespace wecall 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /cpp/src/readrecalibration/readRecalibration.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef SLIP_SLIDE_HPP 3 | #define SLIP_SLIDE_HPP 4 | 5 | #include "version/version.hpp" 6 | #include "io/bamFile.hpp" 7 | #include "io/fastaFile.hpp" 8 | #include "caller/region.hpp" 9 | #include "io/read.hpp" 10 | #include "io/readRange.hpp" 11 | #include "caller/region.hpp" 12 | 13 | #include "readrecalibration/errorCorrectionParameters.hpp" 14 | #include "siteKmerDistribution.hpp" 15 | #include "readrecalibration/kmerDistribution.hpp" 16 | #include "readrecalibration/readDataForErrorPosterior.hpp" 17 | #include "readrecalibration/commonTypes.hpp" 18 | 19 | namespace wecall 20 | { 21 | namespace corrector 22 | { 23 | void floorLowQualityScores( const io::perSampleRegionsReads_t & allReadsInRegion, char qualityFloor, char floorTo ); 24 | 25 | void recalibrateDephasingErrors( const io::perSampleRegionsReads_t & allReadsInRegion, 26 | const wecall::io::FastaFile & fa, 27 | const caller::Region & region, 28 | const ErrorCorrectionParameters & errorCorrectionParameters ); 29 | 30 | void recalibrateReads( const io::perSampleRegionsReads_t & allReadsInRegion, 31 | const wecall::io::FastaFile & fa, 32 | const caller::Region & region, 33 | const ErrorCorrectionParameters & errorCorrectionParameters ); 34 | } 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /cpp/src/readrecalibration/siteKmerDistribution.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "readrecalibration/siteKmerDistribution.hpp" 3 | #include 4 | 5 | namespace wecall 6 | { 7 | namespace corrector 8 | { 9 | SiteKmerDistribution::SiteKmerDistribution( wecall::utils::BasePairSequence paddedKmer ) 10 | { 11 | assert( paddedKmer.size() == kmerSize + 2 * padding ); 12 | 13 | std::copy( paddedKmer.cbegin() + padding, paddedKmer.cend() - padding, m_referenceKmer.data() ); 14 | std::copy( paddedKmer.cbegin(), paddedKmer.cend(), m_extReferenceKmer.data() ); 15 | } 16 | 17 | void SiteKmerDistribution::resetErrorCountData( double priorPerNucProbOfReadTurningIntoErrorState ) 18 | { 19 | double const pseudoCount = 1; // modestly conservative setPrior: pretend to see one well-behaving read 20 | m_errorCountData[0] = {pseudoCount, pseudoCount * priorPerNucProbOfReadTurningIntoErrorState}; 21 | m_errorCountData[1] = {pseudoCount, pseudoCount * priorPerNucProbOfReadTurningIntoErrorState}; 22 | } 23 | 24 | void SiteKmerDistribution::accumulateErrorProbability( double errorProbability, bool isForward ) 25 | { 26 | int fwbwIndex = isForward ? 0 : 1; 27 | 28 | assert( errorProbability >= -1.0e-10 ); 29 | assert( m_errorCountData[fwbwIndex].errorOpportunity > 0.0 ); 30 | assert( m_errorCountData[fwbwIndex].errorCount > 0.0 ); 31 | 32 | m_errorCountData[fwbwIndex].errorOpportunity += 1; 33 | m_errorCountData[fwbwIndex].errorCount += errorProbability; 34 | } 35 | 36 | void SiteKmerDistribution::updateErrorProbabilities() 37 | { 38 | m_pErrorForward = m_errorCountData[0].errorCount / m_errorCountData[0].errorOpportunity; 39 | m_pErrorBackward = m_errorCountData[1].errorCount / m_errorCountData[1].errorOpportunity; 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /cpp/src/utils/bestScoreSelector.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "common.hpp" 10 | #include "utils/logging.hpp" 11 | #include "utils/bestScoreSelector.hpp" 12 | 13 | namespace wecall 14 | { 15 | namespace utils 16 | { 17 | std::size_t indexOfHighestValue( const std::vector< double > & values ) 18 | { 19 | return long_to_sizet( std::distance( values.cbegin(), std::max_element( values.cbegin(), values.cend() ) ) ); 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /cpp/src/utils/date.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef UTILS_DATE_HPP 3 | #define UTILS_DATE_HPP 4 | 5 | #include "common.hpp" 6 | 7 | #include 8 | 9 | namespace wecall 10 | { 11 | namespace utils 12 | { 13 | /// Utility function to return the current date as ISO8601 formated string 14 | /// TODO: replace this messy C implementation with something nicer, maybe BOOST? 15 | inline std::string getCurrentDate() 16 | { 17 | char buffer[12]; 18 | 19 | auto time = std::time( nullptr ); 20 | auto tm = std::localtime( &time ); 21 | std::strftime( buffer, 12, "%Y-%m-%d", tm ); 22 | 23 | std::string date = buffer; 24 | return date; 25 | } 26 | } 27 | } 28 | #endif 29 | -------------------------------------------------------------------------------- /cpp/src/utils/exceptions.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef UTILS_EXCEPTIONS_HPP 3 | #define UTILS_EXCEPTIONS_HPP 4 | 5 | #include 6 | 7 | namespace wecall 8 | { 9 | namespace utils 10 | { 11 | class wecall_exception : public std::runtime_error 12 | { 13 | public: 14 | wecall_exception( const std::string & error ) : std::runtime_error( error ) {} 15 | }; 16 | } 17 | } 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /cpp/src/utils/flatten.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #pragma once 3 | 4 | namespace wecall 5 | { 6 | namespace utils 7 | { 8 | namespace functional 9 | { 10 | 11 | template < template < typename... > class R = std::vector, 12 | typename Outer, 13 | typename Inner = typename Outer::value_type > 14 | R< typename Inner::value_type > flatten( const Outer & all ) 15 | { 16 | R< typename Inner::value_type > flattened; 17 | 18 | for ( const auto & inner : all ) 19 | { 20 | flattened.insert( std::end( flattened ), std::begin( inner ), std::end( inner ) ); 21 | } 22 | 23 | return flattened; 24 | } 25 | 26 | } // namespace functional 27 | } // namespace utils 28 | } // namespace wecall 29 | -------------------------------------------------------------------------------- /cpp/src/utils/identity.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #pragma once 3 | 4 | namespace wecall 5 | { 6 | namespace utils 7 | { 8 | namespace functional 9 | { 10 | 11 | template < typename T > 12 | T identity( T t ) 13 | { 14 | return t; 15 | } 16 | 17 | } // namespace functional 18 | } // namespace utils 19 | } // namespace wecall 20 | -------------------------------------------------------------------------------- /cpp/src/utils/indexedProduct.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace wecall 9 | { 10 | namespace utils 11 | { 12 | 13 | template < typename T, typename Iterator > 14 | T indexedProduct( const std::vector< T > & values, const Iterator index_begin, const Iterator index_end, T init ) 15 | { 16 | std::vector< T > intermediate( static_cast< std::size_t >( std::distance( index_begin, index_end ) ) ); 17 | const auto lookup = [&values]( const std::size_t hapIndex ) -> T 18 | { 19 | return values.at( hapIndex ); 20 | }; 21 | std::transform( index_begin, index_end, intermediate.begin(), lookup ); 22 | return std::accumulate( intermediate.cbegin(), intermediate.cend(), init, std::multiplies< T >() ); 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /cpp/src/utils/matrix.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "utils/matrix.hpp" 3 | #include "utils/median.hpp" 4 | 5 | namespace wecall 6 | { 7 | namespace utils 8 | { 9 | void smoothLowOutliers( utils::matrix_t & matrix_t, const double maxDifference ) 10 | { 11 | if ( matrix_t.size1() == 0 or matrix_t.size2() == 0 ) 12 | { 13 | return; 14 | } 15 | 16 | const auto nReads = matrix_t.size1(); 17 | 18 | std::vector< double > maxValuesPerRead; 19 | 20 | for ( std::size_t readIndex = 0; readIndex < nReads; ++readIndex ) 21 | { 22 | const utils::matrixRow_t row_t( matrix_t, readIndex ); 23 | const auto max = *std::max_element( row_t.begin(), row_t.end() ); 24 | maxValuesPerRead.push_back( max ); 25 | } 26 | 27 | const auto median = utils::functional::median( maxValuesPerRead ); 28 | 29 | const double minValue = median * maxDifference; 30 | 31 | for ( auto & val : matrix_t.data() ) 32 | { 33 | val = std::max( val, minValue ); 34 | } 35 | } 36 | } 37 | } -------------------------------------------------------------------------------- /cpp/src/utils/matrix.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef MATRIX_HPP 3 | #define MATRIX_HPP 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | namespace wecall 11 | { 12 | namespace utils 13 | { 14 | 15 | using matrix_t = boost::numeric::ublas::matrix< double >; 16 | using matrixRow_t = boost::numeric::ublas::matrix_row< const utils::matrix_t >; 17 | using matrixColumn_t = boost::numeric::ublas::matrix_column< const utils::matrix_t >; 18 | 19 | template < typename MatrixRow > 20 | double sumMatrixRowOverAllIndices( const MatrixRow & matrixRow ) 21 | { 22 | return std::accumulate( matrixRow.begin(), matrixRow.end(), 0.0 ); 23 | } 24 | 25 | template < typename MatrixRow, typename IndexSet > 26 | double sumMatrixRowOverIndexSubset( const MatrixRow & matrixRow, const IndexSet & indices ) 27 | { 28 | auto theSum = 0.0; 29 | 30 | for ( const auto index : indices ) 31 | { 32 | theSum += matrixRow( index ); 33 | } 34 | return theSum; 35 | } 36 | 37 | void smoothLowOutliers( utils::matrix_t & matrix_t, const double maxDifference ); 38 | 39 | } // namespace utils 40 | } // namespace wecall 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /cpp/src/utils/median.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #pragma once 3 | 4 | #include "utils/logging.hpp" 5 | 6 | namespace wecall 7 | { 8 | namespace utils 9 | { 10 | namespace functional 11 | { 12 | template < typename Type > 13 | Type median( const std::vector< Type > & values ) 14 | { 15 | std::vector< Type > vecTypes = values; 16 | WECALL_ASSERT( not vecTypes.empty(), "Median cannot be computed of empty list" ); 17 | 18 | std::size_t middleIdx = vecTypes.size() / 2; 19 | 20 | auto target = vecTypes.begin() + middleIdx; 21 | std::nth_element( vecTypes.begin(), target, vecTypes.end() ); 22 | 23 | if ( vecTypes.size() % 2 != 0 ) 24 | { 25 | return *target; 26 | } 27 | else 28 | { 29 | auto targetNeighbour = std::max_element( vecTypes.begin(), target ); 30 | return ( *target + *targetNeighbour ) / 2; 31 | } 32 | } 33 | } 34 | } 35 | } -------------------------------------------------------------------------------- /cpp/src/utils/multinomialCoefficients.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "utils/multinomialCoefficients.hpp" 3 | 4 | #include 5 | 6 | unsigned int factorial( unsigned int n ) 7 | { 8 | unsigned int result = 1; 9 | while ( n ) 10 | { 11 | result *= n--; 12 | } 13 | return result; 14 | } 15 | 16 | unsigned int multinomial_coefficient( std::vector< unsigned int > k ) 17 | { 18 | const unsigned int n = std::accumulate( k.cbegin(), k.cend(), 0u ); 19 | 20 | unsigned int den = 1; 21 | for ( const auto & k_i : k ) 22 | { 23 | den *= factorial( k_i ); 24 | } 25 | 26 | return factorial( n ) / den; 27 | } 28 | -------------------------------------------------------------------------------- /cpp/src/utils/multinomialCoefficients.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef _MULTINOMIALCOEFFICIENTS_H_ 3 | #define _MULTINOMIALCOEFFICIENTS_H_ 4 | #include 5 | #include 6 | 7 | unsigned int factorial( unsigned int n ); 8 | 9 | unsigned int multinomial_coefficient( std::vector< unsigned int > k ); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /cpp/src/utils/partition.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #pragma once 3 | #include 4 | 5 | namespace wecall 6 | { 7 | namespace utils 8 | { 9 | namespace functional 10 | { 11 | 12 | template < typename Input, 13 | typename ComparisonFunction, 14 | template < typename... > class R = std::vector, 15 | typename Inner = Input > 16 | R< Inner > partition( const Input & all, const ComparisonFunction & comp ) 17 | { 18 | R< Inner > partitioned; 19 | 20 | if ( not all.empty() ) 21 | { 22 | partitioned.push_back( {*all.begin()} ); 23 | 24 | for ( auto it = all.begin() + 1; it != all.end(); ++it ) 25 | { 26 | if ( comp( partitioned.back().back(), *it ) ) 27 | { 28 | partitioned.back().push_back( *it ); 29 | } 30 | else 31 | { 32 | partitioned.push_back( {*it} ); 33 | } 34 | } 35 | } 36 | 37 | return partitioned; 38 | } 39 | 40 | } // namespace functional 41 | } // namespace utils 42 | } // namespace wecall 43 | -------------------------------------------------------------------------------- /cpp/src/utils/timer.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef TIMER_HPP 3 | #define TIMER_HPP 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace wecall 10 | { 11 | namespace utils 12 | { 13 | std::string encodeString( std::string rawString ); 14 | 15 | class Timer final 16 | { 17 | public: 18 | Timer( std::string type, std::map< std::string, std::string > metadata ); 19 | ~Timer(); 20 | 21 | void start(); 22 | void pause(); 23 | 24 | private: 25 | std::string format_metadata() const; 26 | std::string m_type; 27 | std::map< std::string, std::string > m_metadata; 28 | 29 | std::chrono::steady_clock::time_point m_start; 30 | long m_duration; 31 | }; 32 | 33 | using timerPtr_t = std::shared_ptr< Timer >; 34 | 35 | std::map< std::string, std::string > fileMetaData( std::string filename ); 36 | 37 | class ScopedTimerTrigger 38 | { 39 | public: 40 | ScopedTimerTrigger( timerPtr_t timer ) : m_timer( timer ) { m_timer->start(); } 41 | 42 | ~ScopedTimerTrigger() { m_timer->pause(); } 43 | 44 | private: 45 | timerPtr_t m_timer; 46 | }; 47 | } 48 | } 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /cpp/src/utils/write.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef UTILS_WRITE_HPP 3 | #define UTILS_WRITE_HPP 4 | 5 | #include 6 | #include 7 | 8 | #include "common.hpp" 9 | 10 | namespace wecall 11 | { 12 | namespace utils 13 | { 14 | 15 | template < typename Type > 16 | std::string toString( const Type & value ) 17 | { 18 | std::stringstream ret; 19 | ret << std::setprecision( 9 ) << value; 20 | return ret.str(); 21 | } 22 | } 23 | } 24 | #endif 25 | -------------------------------------------------------------------------------- /cpp/src/varfilters/variantSoftFilterBank.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef VARFILTERS_VARIANTFILTERBANK_HPP 3 | #define VARFILTERS_VARIANTFILTERBANK_HPP 4 | 5 | #include "varfilters/filter.hpp" 6 | #include "caller/region.hpp" 7 | #include "caller/callSet.hpp" 8 | 9 | namespace wecall 10 | { 11 | namespace varfilters 12 | { 13 | using varFilterId_t = std::string; 14 | 15 | class VariantSoftFilterBank 16 | { 17 | public: 18 | VariantSoftFilterBank( std::vector< std::string > varFilterIDs, 19 | double alleleBiasThreshP, 20 | double strandBiasThreshP, 21 | double allelePlusStrandBiasThreshP, 22 | phred_t minRootMeanSquareMappingQ, 23 | double minSNPQOverDepth, 24 | double minINDELQOverDepth, 25 | phred_t minBadReadsScore, 26 | phred_t minCallQual ); 27 | 28 | std::vector< vcf::FilterDesc > getFilterDescs() const; 29 | 30 | void applyFilterAnnotation( caller::callVector_t & callSet ); 31 | 32 | private: 33 | std::set< varfilters::FilterPtr_t, FilterPtrComp > m_variantFilters; 34 | std::vector< vcf::FilterDesc > m_systematicFilterDescs; 35 | }; 36 | } 37 | } 38 | 39 | #endif // VARFILTERS_VARIANTFILTERBANK_HPP 40 | -------------------------------------------------------------------------------- /cpp/src/variant/haplotypeGenerator.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef HAPLOTYPE_GENERATOR_HPP 3 | #define HAPLOTYPE_GENERATOR_HPP 4 | 5 | #include 6 | 7 | #include "variant/haplotypeRanker.hpp" 8 | #include "variant/haplotype.hpp" 9 | #include "io/readRange.hpp" 10 | #include "io/fastaFile.hpp" 11 | 12 | namespace wecall 13 | { 14 | namespace variant 15 | { 16 | 17 | class AlignmentHaplotypeGenerator 18 | { 19 | public: 20 | AlignmentHaplotypeGenerator( const std::vector< variant::varPtr_t > & variants, 21 | const caller::SetRegions & region, 22 | const io::perSampleRegionsReads_t & readsPerSample, 23 | utils::referenceSequencePtr_t referenceSequence, 24 | const int64_t maxHaplotypesPerRanker, 25 | const std::size_t minReadsToSupportClaim ); 26 | 27 | HaplotypeVector generateHaplotypes() const; 28 | 29 | protected: 30 | HaplotypeVector generateRawHaplotypes() const; 31 | 32 | std::vector< variantSet_t > bestVariantCombos( const VariantCluster & cluster ) const; 33 | 34 | private: 35 | const std::vector< variant::varPtr_t > m_vars; 36 | const caller::SetRegions m_regions; 37 | const io::perSampleRegionsReads_t & m_readsPerSample; 38 | const utils::referenceSequencePtr_t m_referenceSequence; 39 | const int64_t m_maxHaplotypesPerRanker; 40 | const std::size_t m_minReadsToSupportClaim; 41 | }; 42 | } 43 | } 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /cpp/src/variant/haplotypeRanker.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include 3 | #include 4 | #include 5 | #include "utils/bestScoreSelector.hpp" 6 | #include "variant/haplotypeRanker.hpp" 7 | #include "variant/haplotype.hpp" 8 | #include "utils/combinations.hpp" 9 | #include "mapping/hashMapper.hpp" 10 | #include "io/read.hpp" 11 | 12 | namespace wecall 13 | { 14 | namespace variant 15 | { 16 | std::set< std::size_t > AlignmentHaplotypeRanker::getTopHaplotypes( const HaplotypeVector & haplotypes, 17 | const uint64_t maxHaplotypes ) const 18 | { 19 | if ( haplotypes.size() <= maxHaplotypes ) 20 | { 21 | std::vector< std::size_t > v( haplotypes.size() ); 22 | std::iota( v.begin(), v.end(), 0 ); 23 | return std::set< std::size_t >( v.begin(), v.end() ); 24 | } 25 | 26 | std::vector< double > totalHaplotypeFrequencies( haplotypes.size(), 0.0 ); 27 | for ( const auto & readRangePair : m_reads ) 28 | { 29 | const auto haplotypeLikelihoods = caller::computeHaplotypeLikelihoods( haplotypes, readRangePair.second ); 30 | const auto haplotypeFrequencies = caller::computeHaplotypeFrequencies( haplotypeLikelihoods ); 31 | 32 | for ( std::size_t haplotypeIndex = 0; haplotypeIndex != haplotypes.size(); ++haplotypeIndex ) 33 | { 34 | totalHaplotypeFrequencies[haplotypeIndex] += haplotypeFrequencies[haplotypeIndex]; 35 | } 36 | } 37 | const auto bestIndicies = 38 | utils::indiciesWithHighestValues< double >( totalHaplotypeFrequencies, maxHaplotypes, 1.0, 1.0e5 ); 39 | return std::set< std::size_t >( bestIndicies.cbegin(), bestIndicies.cend() ); 40 | } 41 | 42 | //------------------------------------------------------------------------------------------------- 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /cpp/src/variant/haplotypeRanker.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef HAPLOTYPE_RANKER_HPP 3 | #define HAPLOTYPE_RANKER_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "variant/haplotype.hpp" 11 | 12 | namespace wecall 13 | { 14 | namespace io 15 | { 16 | class Read; 17 | } 18 | 19 | namespace variant 20 | { 21 | class AlignmentHaplotypeRanker 22 | { 23 | public: 24 | AlignmentHaplotypeRanker( const io::perSampleRegionsReads_t & reads ) : m_reads( reads ) {} 25 | 26 | std::set< std::size_t > getTopHaplotypes( const HaplotypeVector & haplotypes, 27 | const uint64_t maxHaplotypes ) const; 28 | 29 | private: 30 | io::perSampleRegionsReads_t m_reads; 31 | }; 32 | } 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /cpp/src/variant/snpFinder.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef VARIANT_SNPFINDER_HPP 3 | #define VARIANT_SNPFINDER_HPP 4 | 5 | #include "variant/variantGenerationData.hpp" 6 | #include "alignment/cigarItems.hpp" 7 | #include "variant/type/variant.hpp" 8 | 9 | namespace wecall 10 | { 11 | namespace variant 12 | { 13 | class SNPFinder 14 | { 15 | public: 16 | SNPFinder( variantGenerationDataPtr_t variantGenerationData ) : m_varGenData( variantGenerationData ) {} 17 | 18 | variantSet_t findSNPsInReadSegment( const alignment::offsetsPtr_t offsets, const int64_t length ) const; 19 | 20 | private: 21 | int64_t refIndexFromReadIndex( const int64_t readIndex, const alignment::offsetsPtr_t offsetsPtr_t ) const; 22 | int64_t readIndexFromRefIndexInStr( const alignment::offsetsPtr_t offsetsPtr_t ) const; 23 | 24 | variantGenerationDataPtr_t m_varGenData; 25 | }; 26 | } 27 | } 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /cpp/src/variant/variantFilter.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef VARIANT_FILTER_HPP 3 | #define VARIANT_FILTER_HPP 4 | 5 | #include "variant/type/variant.hpp" 6 | 7 | namespace wecall 8 | { 9 | namespace variant 10 | { 11 | class VariantContainer; 12 | 13 | /// VariantFilter class. Class that from a variant container outputs sorted filtered variants 14 | /// 15 | class VariantFilter 16 | { 17 | 18 | public: 19 | /// Construct a VariantFilter class 20 | VariantFilter( const int64_t minReads, const int64_t minPerSamplePercentage ) 21 | : m_minReads( minReads ), m_minPerSamplePercentage( minPerSamplePercentage ) 22 | { 23 | } 24 | 25 | /// Returns sorted vector of all variants that pass filtering 26 | /// 27 | /// @param varContainer The input variants to be filtered and sorted. 28 | variantSet_t getSortedFilteredVariants( const caller::Region & blockRegion, 29 | const VariantContainer & varContainer ) const; 30 | 31 | private: 32 | /// Returns true if variant has >= minReads supporting it 33 | bool variantPassesFilters( varPtr_t varPtr, const VariantContainer & varContainer ) const; 34 | 35 | bool variantHasMinReadsAcrossSamples( varPtr_t varPtr, const VariantContainer & varContainer ) const; 36 | 37 | bool variantHasSampleWithMinPercentReadCoverage( varPtr_t varPtr, const VariantContainer & varContainer ) const; 38 | 39 | private: 40 | const int64_t m_minReads; 41 | const int64_t m_minPerSamplePercentage; 42 | }; 43 | } 44 | } 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /cpp/src/variant/variantGenerationData.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "variantGenerationData.hpp" 3 | #include "io/read.hpp" 4 | 5 | namespace wecall 6 | { 7 | namespace variant 8 | { 9 | VariantGenerationData::VariantGenerationData( utils::referenceSequencePtr_t refSeq, const io::Read & read ) 10 | : VariantGenerationData( refSeq, read.getStartPos(), read.sequence() ) 11 | { 12 | } 13 | 14 | //----------------------------------------------------------------------------------------- 15 | 16 | } // namespace variant 17 | } // namespace wecall 18 | -------------------------------------------------------------------------------- /cpp/src/variant/variantGenerationData.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef VARIANT_GENERATION_DATA_HPP 3 | #define VARIANT_GENERATION_DATA_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "caller/region.hpp" 12 | #include "common.hpp" 13 | 14 | namespace wecall 15 | { 16 | namespace io 17 | { 18 | class Read; 19 | } // namespace io 20 | 21 | namespace variant 22 | { 23 | struct VariantGenerationData 24 | { 25 | explicit VariantGenerationData( utils::referenceSequencePtr_t refSeq, 26 | int64_t readStartPos, 27 | utils::BasePairSequence readSeq ) 28 | : refSeq( refSeq ), readStartPos( readStartPos ), readSeq( readSeq ) 29 | { 30 | } 31 | 32 | VariantGenerationData( utils::referenceSequencePtr_t refSeq, const io::Read & read ); 33 | 34 | utils::referenceSequencePtr_t refSeq; 35 | 36 | const int64_t readStartPos; 37 | const utils::BasePairSequence readSeq; 38 | }; 39 | 40 | using variantGenerationDataPtr_t = std::shared_ptr< const VariantGenerationData >; 41 | 42 | } // namespace variant 43 | } // namespace wecall 44 | 45 | #endif // VARIANT_GENERATION_DATA_HPP 46 | -------------------------------------------------------------------------------- /cpp/src/variant/variantGenerator.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef VARIANT_GENERATOR_HPP 3 | #define VARIANT_GENERATOR_HPP 4 | 5 | #include "caller/region.hpp" 6 | #include "io/readRange.hpp" 7 | #include "variant/type/variant.hpp" 8 | #include "variant/variantContainer.hpp" 9 | #include "utils/referenceSequence.hpp" 10 | 11 | #include 12 | #include 13 | 14 | namespace wecall 15 | { 16 | namespace variant 17 | { 18 | std::vector< varPtr_t > normaliseVariantsOnStrand( const std::vector< varPtr_t > & variants, 19 | const utils::ReferenceSequence & m_referenceSequence ); 20 | 21 | std::vector< phred_t > getVariantsReadBaseQualities( int64_t startPos, 22 | const utils::QualitySequence & qualityString, 23 | const std::vector< varPtr_t > & variants, 24 | const std::vector< variant::breakpointPtr_t > & breakpoints ); 25 | 26 | class VariantGenerator 27 | { 28 | public: 29 | VariantGenerator( const utils::referenceSequencePtr_t & refSeq, 30 | const phred_t minBaseQual, 31 | const phred_t minMappingQual ); 32 | 33 | VariantContainer generateVariantsFromReads( io::perSampleRegionsReads_t perSamReadRanges ) const; 34 | 35 | private: 36 | const utils::referenceSequencePtr_t m_referenceSequence; 37 | const phred_t m_minBaseQual; 38 | const phred_t m_minMappingQual; 39 | }; 40 | } 41 | } 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /cpp/src/variant/variantNormalizer.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef VARIANT_NORMALIZER_HPP 3 | #define VARIANT_NORMALIZER_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "utils/referenceSequence.hpp" 10 | #include "utils/NeedlemanWunsch.hpp" 11 | #include "variant/type/variant.hpp" 12 | 13 | namespace wecall 14 | { 15 | namespace variant 16 | { 17 | class VariantNormalizer 18 | { 19 | using scoreMatrix_t = boost::numeric::ublas::matrix< int32_t >; 20 | 21 | using bitType_t = int64_t; 22 | using traceMatrix_t = boost::numeric::ublas::matrix< bitType_t >; 23 | 24 | public: 25 | VariantNormalizer( const utils::referenceSequencePtr_t & referenceSequence ) 26 | : m_referenceSequence( referenceSequence ) 27 | { 28 | } 29 | 30 | variantSet_t getNormalized( const caller::Region & region, 31 | const utils::BasePairSequence & sequence, 32 | const boost::optional< variantSet_t > unnormalizedVariants ) const; 33 | 34 | private: 35 | const utils::referenceSequencePtr_t m_referenceSequence; 36 | const uint64_t m_maxMatrixSize = 50000; 37 | }; 38 | } 39 | } 40 | #endif 41 | -------------------------------------------------------------------------------- /cpp/src/vcf/filterDescription.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "vcf/filterDescription.hpp" 3 | 4 | namespace wecall 5 | { 6 | namespace vcf 7 | { 8 | FilterDesc::FilterDesc( const std::string & id, const std::string & description ) 9 | : id( id ), description( description ) 10 | { 11 | } 12 | 13 | std::ostream & operator<<( std::ostream & out, const FilterDesc & filterDesc ) 14 | { 15 | out << ""; 16 | 17 | return out; 18 | } 19 | 20 | bool operator<( const FilterDesc & lhs, const FilterDesc & rhs ) { return ( lhs.id < rhs.id ); } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /cpp/src/vcf/filterDescription.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef VCF_FILTERDESC_HPP 3 | #define VCF_FILTERDESC_HPP 4 | 5 | #include "common.hpp" 6 | 7 | #include 8 | 9 | namespace wecall 10 | { 11 | namespace vcf 12 | { 13 | struct FilterDesc 14 | { 15 | /// Basic constructor from constituent data 16 | /// 17 | /// @param id Filter ID. 18 | /// @param description Filter description. 19 | FilterDesc( const std::string & id, const std::string & description ); 20 | 21 | /// Writes the filter to the output stream in the form of a VCF FILTER header line. 22 | /// 23 | /// @param out Output stream 24 | /// @param filterDesc Filter to be output 25 | /// @return Output stream 26 | friend std::ostream & operator<<( std::ostream & out, const FilterDesc & filterDesc ); 27 | friend bool operator<( const FilterDesc & lhs, const FilterDesc & rhs ); 28 | 29 | std::string id; 30 | std::string description; 31 | }; 32 | } 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /cpp/src/vcf/reader.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "reader.hpp" 11 | 12 | namespace wecall 13 | { 14 | namespace vcf 15 | { 16 | Info parseVCFInfo( const std::string & raw_info ) 17 | { 18 | Info parsed_info; 19 | 20 | std::vector< std::string > fields; 21 | boost::split( fields, raw_info, boost::is_any_of( ";" ) ); 22 | 23 | for ( std::string & field : fields ) 24 | { 25 | std::vector< std::string > parts; 26 | boost::split( parts, field, boost::is_any_of( "=" ) ); 27 | 28 | if ( parts.size() == 1 ) 29 | { 30 | parsed_info.push_back( std::make_pair( parts.front(), std::vector< std::string >{} ) ); 31 | } 32 | else if ( parts.size() == 2 ) 33 | { 34 | std::vector< std::string > value; 35 | boost::split( value, parts.back(), boost::is_any_of( "," ) ); 36 | 37 | const auto notEmpty = []( const std::string & item ) 38 | { 39 | return not item.empty(); 40 | }; 41 | 42 | std::vector< std::string > cleaned_value; 43 | std::copy_if( value.begin(), value.end(), std::back_inserter( cleaned_value ), notEmpty ); 44 | 45 | parsed_info.push_back( std::make_pair( parts.front(), cleaned_value ) ); 46 | } 47 | else 48 | { 49 | WECALL_LOG( DEBUG, "Invalid info field " << field ); 50 | } 51 | } 52 | 53 | return parsed_info; 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /cpp/src/vcf/reader.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef VCF_READER_HPP 3 | #define VCF_READER_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "vcf/filterDescription.hpp" 12 | #include "vcf/record.hpp" 13 | 14 | #include "utils/timer.hpp" 15 | 16 | struct shouldParseValidVCFFilterHeaderUpperCaseID; 17 | struct shouldParseValidVCFFilterHeaderLowerCaseID; 18 | struct shouldParseValidVCFFilterHeaderDigitsAndPunctuation; 19 | struct shouldRaiseOnINFOHeaderType; 20 | struct shouldRaiseOnInvalidVCFFilterHeader; 21 | struct shouldRaiseOnFORMATHeaderType; 22 | 23 | namespace wecall 24 | { 25 | namespace vcf 26 | { 27 | using vcfMetaInformation_t = std::map< std::string, std::string >; 28 | 29 | Info parseVCFInfo( const std::string & raw_info ); 30 | } 31 | } 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /cpp/src/version/version.cpp.template: -------------------------------------------------------------------------------- 1 | // Define the implementation of the compiled-in version number 2 | // See: 'http://stackoverflow.com/a/4318642' 3 | 4 | #include "version/version.hpp" 5 | 6 | #ifdef COMMIT 7 | #define GIT_SHA1 COMMIT 8 | #else 9 | #define GIT_SHA1 "@GIT_SHA1@" 10 | #endif 11 | 12 | const char g_GIT_SHA1[] = GIT_SHA1; 13 | const char g_PRODUCT_VERSION[] = "@PRODUCT_VERSION@"; 14 | const char g_BUILD_DATE[] = "@BUILD_DATE@"; 15 | -------------------------------------------------------------------------------- /cpp/src/version/version.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | // Define a global commit hash and product version, for useful provenance information 3 | // See: 'http://stackoverflow.com/a/4318642' 4 | 5 | extern const char g_GIT_SHA1[]; 6 | extern const char g_PRODUCT_VERSION[]; 7 | extern const char g_BUILD_DATE[]; 8 | -------------------------------------------------------------------------------- /cpp/src/weCall.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "weCallMapAndReduce.hpp" 3 | #include "weCallReduce.hpp" 4 | 5 | int main( const int argc, char * argv[] ) 6 | { 7 | if ( argc > 1 and strncmp( argv[1], "reduce", 7 ) == 0 ) 8 | { 9 | // Shift command-line arguments up by one 10 | char ** newArgv = new char * [argc - 1]; 11 | newArgv[0] = argv[0]; 12 | 13 | for ( std::size_t i = 1; i < static_cast< std::size_t >( argc - 1 ); ++i ) 14 | { 15 | newArgv[i] = argv[i + 1]; 16 | } 17 | 18 | const auto rt = wecall::weCallReduce().processJob( argc - 1, newArgv ); 19 | delete[] newArgv; 20 | return rt; 21 | } 22 | else 23 | { 24 | return wecall::weCallMapAndReduce().processJob( argc, argv ); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /cpp/src/weCallBase.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef WECALL_BASE_HPP 3 | #define WECALL_BASE_HPP 4 | 5 | #include 6 | #include 7 | 8 | namespace wecall 9 | { 10 | class weCallBase 11 | { 12 | public: 13 | weCallBase( std::string programName ) 14 | : m_programName( programName ), 15 | m_programVersion( std::string( g_PRODUCT_VERSION ) ), 16 | m_publicOpts( m_programName + " v" + m_programVersion + " (" + std::string( g_GIT_SHA1, 10 ) + 17 | ") configuration parameters" ) 18 | { 19 | } 20 | 21 | virtual ~weCallBase() {} 22 | 23 | protected: 24 | std::string m_programName; 25 | std::string m_programVersion; 26 | 27 | boost::program_options::options_description m_publicOpts; 28 | }; 29 | } 30 | #endif 31 | -------------------------------------------------------------------------------- /cpp/src/weCallMapAndReduce.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef WECALL_MAP_HPP 3 | #define WECALL_MAP_HPP 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "caller/jobReduce.hpp" 17 | #include "common.hpp" 18 | #include "caller/job.hpp" 19 | #include "version/version.hpp" 20 | #include "weCallBase.hpp" 21 | 22 | namespace wecall 23 | { 24 | using namespace boost::program_options; 25 | using namespace io; 26 | 27 | class weCallMapAndReduce : public weCallBase 28 | { 29 | public: 30 | weCallMapAndReduce(); 31 | int processJob( int argc, char * argv[] ); 32 | 33 | private: 34 | void initOptions(); 35 | 36 | private: 37 | options_description m_configOpts; 38 | options_description m_cmdLineOpts; 39 | }; 40 | } 41 | 42 | #endif -------------------------------------------------------------------------------- /cpp/src/weCallReduce.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef WECALL_REDUCE_HPP 3 | #define WECALL_REDUCE_HPP 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "caller/jobReduce.hpp" 15 | #include "common.hpp" 16 | #include "caller/job.hpp" 17 | #include "version/version.hpp" 18 | #include "weCallBase.hpp" 19 | 20 | namespace wecall 21 | { 22 | class weCallReduce : public weCallBase 23 | { 24 | public: 25 | weCallReduce(); 26 | 27 | int processJob( int argc, char * argv[] ); 28 | 29 | private: 30 | void initOptions(); 31 | }; 32 | } 33 | 34 | #endif -------------------------------------------------------------------------------- /cpp/test/ioTest/caller/testRegionUtils.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #define BOOST_TEST_DYN_LINK 3 | #include 4 | 5 | #include "caller/region.hpp" 6 | #include "caller/regionUtils.hpp" 7 | #include "utils/flatten.hpp" 8 | #include "ioTest/io/ioFixture.hpp" 9 | 10 | using wecall::caller::Region; 11 | using wecall::utils::Interval; 12 | using wecall::caller::parseRegionString; 13 | 14 | BOOST_FIXTURE_TEST_CASE( shouldExtractCorrectRegionsWhenNoneSuppliedOnCmdLine, wecall::test::FastaIndexFileFixture ) 15 | { 16 | auto actualRegions = wecall::utils::functional::flatten( 17 | wecall::caller::DataRegionsBuilder( {}, wecall::io::FastaIndex( indexFilename ) ).build() ); 18 | 19 | // manual checks 20 | BOOST_CHECK_EQUAL( actualRegions.size(), 25 ); 21 | BOOST_CHECK_EQUAL( actualRegions[0], Region( "1", 0, 249250621 ) ); 22 | 23 | // automatic checks 24 | std::vector< Region > expectedRegions; 25 | for ( auto contigStr : fastaIndices[0]->standardContigs() ) 26 | { 27 | expectedRegions.emplace_back( contigStr, fastaIndices[0]->contigs().at( contigStr ) ); 28 | } 29 | BOOST_CHECK_EQUAL_COLLECTIONS( expectedRegions.begin(), expectedRegions.end(), actualRegions.begin(), 30 | actualRegions.end() ); 31 | } 32 | 33 | BOOST_FIXTURE_TEST_CASE( shlouldExtractCorrectRegionsFromAList, wecall::test::FastaIndexFileFixture ) 34 | { 35 | auto actualRegions = wecall::utils::functional::flatten( 36 | wecall::caller::DataRegionsBuilder( {"1:2-3", "2:100-200"}, wecall::io::FastaIndex( indexFilename ) ) 37 | .build() ); 38 | std::vector< Region > expectedRegions = {Region( "1", 2, 3 ), Region( "2", 100, 200 )}; 39 | 40 | BOOST_CHECK_EQUAL_COLLECTIONS( expectedRegions.begin(), expectedRegions.end(), actualRegions.begin(), 41 | actualRegions.end() ); 42 | } -------------------------------------------------------------------------------- /cpp/test/ioTest/io/testReadDataset.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "io/readDataSet.hpp" 3 | 4 | #define BOOST_TEST_DYN_LINK 5 | #include 6 | 7 | #include 8 | 9 | #include "ioFixture.hpp" 10 | #include "caller/params.hpp" 11 | #include "caller/params.hpp" 12 | #include "io/readDataSet.hpp" 13 | 14 | using wecall::io::ReadDataset; 15 | 16 | BOOST_AUTO_TEST_CASE( testReadDataSetInitialisation ) 17 | { 18 | std::vector< std::string > samples = {"NA12878", "NA12891"}; 19 | ReadDataset readDataSet( samples, wecall::caller::Region( "1", 0, 0 ) ); 20 | BOOST_CHECK( readDataSet.isEmpty() ); 21 | std::vector< std::string > setSamples = readDataSet.getSampleNames(); 22 | BOOST_CHECK_EQUAL_COLLECTIONS( setSamples.begin(), setSamples.end(), samples.begin(), samples.end() ); 23 | auto readRange_NA12878 = readDataSet.getAllReads( 0 ).at( "NA12878" ); 24 | BOOST_CHECK_EQUAL( std::distance( readRange_NA12878.begin(), readRange_NA12878.end() ), 0 ); // has 0 reads 25 | 26 | auto readRange_NA12891 = readDataSet.getAllReads( 0 ).at( "NA12891" ); 27 | BOOST_CHECK_EQUAL( std::distance( readRange_NA12891.begin(), readRange_NA12891.end() ), 0 ); // has 0 reads 28 | } 29 | -------------------------------------------------------------------------------- /cpp/test/ioTest/ioTest.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | // test_main.cpp 3 | #define BOOST_TEST_DYN_LINK 4 | #define BOOST_TEST_MODULE SmallVariantCaller 5 | #include 6 | #include 7 | #include 8 | #include "caller/params.hpp" 9 | #include "utils/logging.hpp" 10 | #include "ioTest/utils/environment.hpp" 11 | 12 | struct LoggingGlobalFixture 13 | { 14 | LoggingGlobalFixture() 15 | { 16 | std::string logFile = "iotest-cpp.log"; 17 | std::cout << "Initialising log to file " << logFile << std::endl; 18 | wecall::caller::params::Logging loggingParams( loggingLevel::DEBUG, logFile, false, -1, false ); 19 | wecall::utils::initialiseLog( loggingParams.m_logLevel, loggingParams.m_logFilename, loggingParams.m_quietMode, 20 | loggingParams.m_verbosity, loggingParams.m_logTimings ); 21 | } 22 | }; 23 | 24 | BOOST_GLOBAL_FIXTURE( LoggingGlobalFixture ); 25 | -------------------------------------------------------------------------------- /cpp/test/ioTest/utils/environment.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include 3 | #include "utils/exceptions.hpp" 4 | 5 | namespace wecall 6 | { 7 | namespace test 8 | { 9 | std::string requireEnv( const char * variableName ) 10 | { 11 | char * value = std::getenv( variableName ); 12 | if ( value == nullptr ) 13 | { 14 | throw utils::wecall_exception( std::string( "Undefined environment variable: " ) + variableName ); 15 | } 16 | else 17 | { 18 | return std::string( value ); 19 | } 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /cpp/test/ioTest/utils/environment.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include 3 | 4 | namespace wecall 5 | { 6 | namespace test 7 | { 8 | std::string requireEnv( const char * variableName ); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /cpp/test/unittest/assembly/testNode.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #define BOOST_TEST_DYN_LINK 3 | #include 4 | 5 | #include 6 | 7 | #include "assembly/node.hpp" 8 | #include "common.hpp" 9 | 10 | using wecall::assembly::Node; 11 | 12 | BOOST_AUTO_TEST_CASE( testIsolatedNode ) 13 | { 14 | auto node = std::make_shared< Node >( 'A', 'C' ); 15 | 16 | BOOST_CHECK_EQUAL( node->lastCharacter(), 'C' ); 17 | BOOST_CHECK( not node->isRegular() ); 18 | BOOST_CHECK( node->isBranch() ); 19 | BOOST_CHECK( node->isTerminal() ); 20 | } 21 | 22 | BOOST_AUTO_TEST_CASE( testRegularNode ) 23 | { 24 | auto node = std::make_shared< Node >( 'A', 'C' ); 25 | auto inNode = std::make_shared< Node >( 'G', 'T' ); 26 | auto outNode = std::make_shared< Node >( 'T', 'G' ); 27 | 28 | node->addInEdge( inNode ); 29 | node->addOutEdge( outNode, 0 ); 30 | 31 | BOOST_CHECK( node->isRegular() ); 32 | BOOST_CHECK( not node->isBranch() ); 33 | BOOST_CHECK( not node->isTerminal() ); 34 | BOOST_CHECK_EQUAL( ( *node->begin() ).lastCharacter(), 'G' ); 35 | } 36 | -------------------------------------------------------------------------------- /cpp/test/unittest/caller/testCandidateVariantBank.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #define BOOST_TEST_DYN_LINK 3 | #include 4 | 5 | #include "variant/type/variant.hpp" 6 | #include "caller/candidateVariantBank.hpp" 7 | #include "unittest/vcf/VCFTestUtils.hpp" 8 | 9 | using namespace wecall::variant; 10 | using namespace wecall::caller; 11 | using wecall::caller::Region; 12 | using wecall::utils::ReferenceSequence; 13 | 14 | BOOST_AUTO_TEST_CASE( shouldComputePriorFromAlleleFrequenceString ) 15 | { 16 | wecall::vcf::Info info; 17 | 18 | info.emplace_back( std::make_pair< std::string, std::vector< std::string > >( 19 | std::string( "AF" ), {std::string( "0.4" ), std::string( "0.5" )} ) ); 20 | info.emplace_back( std::make_pair< std::string, std::vector< std::string > >( 21 | std::string( "DP" ), {std::string( "0.4" ), std::string( "0.1" )} ) ); 22 | 23 | const auto expectedResults = {0.4, 0.5}; 24 | const auto results = getPriorsFromInfo( info ); 25 | 26 | BOOST_CHECK_EQUAL_COLLECTIONS( expectedResults.begin(), expectedResults.end(), results.begin(), results.end() ); 27 | } 28 | 29 | //------------------------------------------------------------------------------------------------- 30 | -------------------------------------------------------------------------------- /cpp/test/unittest/readrecalibration/testCommonTypes.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #define BOOST_TEST_DYN_LINK 3 | #include 4 | #include "readrecalibration/commonTypes.hpp" 5 | #include "stats/functions.hpp" 6 | 7 | BOOST_AUTO_TEST_CASE( testPhredToPCache ) 8 | { 9 | for ( auto i = 0; i < 100; ++i ) 10 | { 11 | auto cacheNumber = wecall::corrector::phred_to_p( i ); 12 | auto computedNumber = wecall::stats::fromPhredQ( i ); 13 | BOOST_CHECK_CLOSE( cacheNumber, computedNumber, 1e-8 ); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /cpp/test/unittest/readrecalibration/testSiteKmerDistribution.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "io/fastaFile.hpp" 3 | #include "readrecalibration/siteKmerDistribution.hpp" 4 | #include "readrecalibration/commonTypes.hpp" 5 | 6 | #define BOOST_TEST_DYN_LINK 7 | #include 8 | 9 | BOOST_AUTO_TEST_CASE( shouldConstructReferenceStringFromPaddedReference ) 10 | { 11 | // Given 12 | // 123456789 13 | std::string paddedReference = "ATCGCTCTG"; 14 | BOOST_CHECK_EQUAL( paddedReference.size(), 2 * wecall::corrector::padding + wecall::corrector::kmerSize ); 15 | 16 | // When 17 | wecall::corrector::SiteKmerDistribution siteKmerDistribution( paddedReference ); 18 | 19 | // Then 20 | BOOST_CHECK_EQUAL( wecall::corrector::show_string( siteKmerDistribution.getReferenceKmer() ), 21 | paddedReference.substr( 1, paddedReference.size() - 2 ) ); 22 | BOOST_CHECK_EQUAL( wecall::corrector::show_string( siteKmerDistribution.getExtReferenceKmer() ), paddedReference ); 23 | } 24 | 25 | BOOST_AUTO_TEST_CASE( shouldComputePErrorAsPriorOnBeingReset ) 26 | { 27 | std::string paddedReference = "ATCGCTCTG"; 28 | wecall::corrector::SiteKmerDistribution siteKmerDistribution( paddedReference ); 29 | auto priorPerNucProbOfReadTurningIntoErrorState = 0.661238; 30 | 31 | siteKmerDistribution.resetErrorCountData( priorPerNucProbOfReadTurningIntoErrorState ); 32 | siteKmerDistribution.updateErrorProbabilities(); 33 | 34 | BOOST_CHECK_CLOSE( siteKmerDistribution.pError( true ), priorPerNucProbOfReadTurningIntoErrorState, 1e-8 ); 35 | BOOST_CHECK_CLOSE( siteKmerDistribution.pError( false ), priorPerNucProbOfReadTurningIntoErrorState, 1e-8 ); 36 | } 37 | -------------------------------------------------------------------------------- /cpp/test/unittest/unittest.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | // test_main.cpp 3 | #define BOOST_TEST_DYN_LINK 4 | #define BOOST_TEST_MODULE SmallVariantCaller 5 | #include 6 | #include 7 | #include 8 | #include "caller/params.hpp" 9 | #include "utils/logging.hpp" 10 | 11 | struct LoggingGlobalFixture 12 | { 13 | LoggingGlobalFixture() 14 | { 15 | std::string logFile = "unittest-cpp.log"; 16 | std::cout << "Initialising log to file " << logFile << std::endl; 17 | wecall::caller::params::Logging loggingParams( loggingLevel::DEBUG, logFile, false, -1, false ); 18 | wecall::utils::initialiseLog( loggingParams.m_logLevel, loggingParams.m_logFilename, loggingParams.m_quietMode, 19 | loggingParams.m_verbosity, loggingParams.m_logTimings ); 20 | } 21 | }; 22 | 23 | BOOST_GLOBAL_FIXTURE( LoggingGlobalFixture ); 24 | -------------------------------------------------------------------------------- /cpp/test/unittest/utils/testBestScoreSelector.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #define BOOST_TEST_DYN_LINK 3 | 4 | #include 5 | 6 | #include "utils/bestScoreSelector.hpp" 7 | 8 | #include 9 | 10 | BOOST_AUTO_TEST_CASE( testWithEmptySetOfScores ) 11 | { 12 | std::vector< double > scores = {}; 13 | auto bestScores = wecall::utils::indiciesWithHighestValues( scores, 100 ); 14 | BOOST_CHECK_EQUAL( bestScores.size(), 0 ); 15 | } 16 | 17 | BOOST_AUTO_TEST_CASE( testGetsAllIndiciesIfTotalAllowedIsHigh ) 18 | { 19 | std::vector< double > scores = {1.0, 2.0}; 20 | auto bestScores = wecall::utils::indiciesWithHighestValues( scores, 2 ); 21 | BOOST_CHECK_EQUAL( bestScores.size(), 2 ); 22 | 23 | BOOST_CHECK_EQUAL( bestScores[0], 1 ); // In reverse order as 2.0 > 1.0 24 | BOOST_CHECK_EQUAL( bestScores[1], 0 ); 25 | } 26 | 27 | BOOST_AUTO_TEST_CASE( testGetsOnlyTheBestScore ) 28 | { 29 | std::vector< double > scores = {1.0, 2.0}; 30 | auto bestScores = wecall::utils::indiciesWithHighestValues( scores, 1 ); 31 | BOOST_CHECK_EQUAL( bestScores.size(), 1 ); 32 | 33 | BOOST_CHECK_EQUAL( bestScores[0], 1 ); 34 | } 35 | 36 | BOOST_AUTO_TEST_CASE( testShouldPickOutTheTwoHighestScores ) 37 | { 38 | std::vector< double > scores = {2.0, 1.0, 2.0}; 39 | auto bestScores = wecall::utils::indiciesWithHighestValues( scores, 2 ); 40 | BOOST_CHECK_EQUAL( bestScores.size(), 2 ); 41 | 42 | BOOST_CHECK_EQUAL( scores[bestScores[0]], 2.0 ); 43 | BOOST_CHECK_EQUAL( scores[bestScores[1]], 2.0 ); 44 | } 45 | 46 | BOOST_AUTO_TEST_CASE( testShouldNotPickOutLowScores ) 47 | { 48 | std::vector< double > scores = {1.0e8, 1.0 + 1e-9, 1.0}; 49 | const auto bestScores = wecall::utils::indiciesWithHighestValues( scores, scores.size(), 1.0, 1.0e8 ); 50 | BOOST_CHECK_EQUAL( bestScores.size(), 2 ); 51 | BOOST_CHECK_EQUAL( scores[bestScores[0]], 1e8 ); 52 | BOOST_CHECK_EQUAL( scores[bestScores[1]], 1.0 + 1e-9 ); 53 | } 54 | -------------------------------------------------------------------------------- /cpp/test/unittest/utils/testFactorial.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #define BOOST_TEST_DYN_LINK 3 | 4 | #include 5 | 6 | #include "utils/multinomialCoefficients.hpp" 7 | 8 | BOOST_AUTO_TEST_CASE( factorial_0 ) { BOOST_CHECK_EQUAL( 1, factorial( 0 ) ); } 9 | 10 | BOOST_AUTO_TEST_CASE( factorial_1 ) { BOOST_CHECK_EQUAL( 1, factorial( 1 ) ); } 11 | 12 | BOOST_AUTO_TEST_CASE( factorial_2 ) { BOOST_CHECK_EQUAL( 2, factorial( 2 ) ); } 13 | 14 | BOOST_AUTO_TEST_CASE( factorial_3 ) { BOOST_CHECK_EQUAL( 6, factorial( 3 ) ); } 15 | -------------------------------------------------------------------------------- /cpp/test/unittest/utils/testInterval.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #define BOOST_TEST_DYN_LINK 3 | #include 4 | 5 | #include "utils/interval.hpp" 6 | 7 | using wecall::utils::Interval; 8 | 9 | BOOST_AUTO_TEST_CASE( emptyIntervalsShouldOverlapEachOther ) 10 | { 11 | BOOST_CHECK( Interval( 1, 1 ).overlaps( Interval( 1, 1 ) ) ); 12 | } 13 | 14 | BOOST_AUTO_TEST_CASE( emptyIntervalDoesNotOverlapTouchingIntervalOnLeft ) 15 | { 16 | BOOST_CHECK( not Interval( 1, 2 ).overlaps( Interval( 2, 2 ) ) ); 17 | } 18 | 19 | BOOST_AUTO_TEST_CASE( emptyIntervalDoesNotOverlapTouchingIntervalOnRight ) 20 | { 21 | BOOST_CHECK( not Interval( 1, 1 ).overlaps( Interval( 1, 2 ) ) ); 22 | } 23 | 24 | BOOST_AUTO_TEST_CASE( shouldCombineNonoverlappingSortedIntervals ) 25 | { 26 | Interval intvl1( 1, 3 ); 27 | Interval intvl2( 5, 10 ); 28 | 29 | Interval expectedIntvl( 1, 10 ); 30 | intvl1.combine( intvl2 ); 31 | 32 | BOOST_CHECK_EQUAL( intvl1, expectedIntvl ); 33 | } 34 | 35 | BOOST_AUTO_TEST_CASE( shouldCombineNonoverlappingNonsortedIntervals ) 36 | { 37 | Interval intvl1( 5, 10 ); 38 | Interval intvl2( 1, 3 ); 39 | 40 | Interval expectedIntvl( 1, 10 ); 41 | intvl1.combine( intvl2 ); 42 | 43 | BOOST_CHECK_EQUAL( intvl1, expectedIntvl ); 44 | } 45 | 46 | BOOST_AUTO_TEST_CASE( shouldCombineOverlappingSortedIntervals ) 47 | { 48 | Interval intvl1( 1, 8 ); 49 | Interval intvl2( 5, 10 ); 50 | 51 | Interval expectedIntvl( 1, 10 ); 52 | intvl1.combine( intvl2 ); 53 | 54 | BOOST_CHECK_EQUAL( intvl1, expectedIntvl ); 55 | } 56 | -------------------------------------------------------------------------------- /cpp/test/unittest/utils/testMatrix.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #define BOOST_TEST_DYN_LINK 3 | #include 4 | 5 | #include "utils/matrix.hpp" 6 | #include 7 | #include 8 | #include 9 | 10 | wecall::utils::matrix_t getMatrixFromVecOfVecs( const std::vector< std::vector< double > > & values ) 11 | { 12 | assert( values.size() > 0 ); 13 | 14 | wecall::utils::matrix_t matrix_t( values.size(), values[0].size() ); 15 | 16 | for ( std::size_t rowIndex = 0; rowIndex < matrix_t.size1(); ++rowIndex ) 17 | { 18 | const auto & row = values[rowIndex]; 19 | assert( row.size() == matrix_t.size2() ); 20 | 21 | std::copy( row.begin(), row.end(), matrix_t.data().begin() + matrix_t.size2() * rowIndex ); 22 | } 23 | return matrix_t; 24 | } 25 | 26 | BOOST_AUTO_TEST_CASE( testSumOverMatrixRowIndexSet ) 27 | { 28 | std::vector< int > values = {1, 2, 3, 4, 5, 6, 7}; 29 | wecall::utils::matrix_t matrix_t( 1, 7 ); 30 | std::copy( values.begin(), values.end(), matrix_t.data().begin() ); 31 | 32 | wecall::utils::matrixRow_t matrixRow_t( matrix_t, 0 ); 33 | 34 | std::set< std::size_t > indicies = {0, 2, 4}; 35 | BOOST_CHECK_EQUAL( wecall::utils::sumMatrixRowOverIndexSubset( matrixRow_t, indicies ), 1 + 3 + 5 ); 36 | BOOST_CHECK_EQUAL( wecall::utils::sumMatrixRowOverAllIndices( matrixRow_t ), 1 + 2 + 3 + 4 + 5 + 6 + 7 ); 37 | } 38 | 39 | BOOST_AUTO_TEST_CASE( testAdjustmentToMedian ) 40 | { 41 | std::vector< std::vector< double > > values = { 42 | {13.0}, {1.0}, {1.0}, {1.0e-6}, {1.0}, {1.0}, {1.0}, {15.0}, 43 | }; 44 | 45 | wecall::utils::matrix_t matrix = getMatrixFromVecOfVecs( values ); 46 | 47 | BOOST_CHECK_CLOSE( *std::min_element( matrix.data().begin(), matrix.data().end() ), 1.0e-6, 1.0 ); 48 | 49 | wecall::utils::smoothLowOutliers( matrix, 1.0e-4 ); 50 | 51 | BOOST_CHECK_CLOSE( *std::min_element( matrix.data().begin(), matrix.data().end() ), 1.0e-4, 1.0 ); 52 | } -------------------------------------------------------------------------------- /cpp/test/unittest/utils/testMedian.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #define BOOST_TEST_DYN_LINK 3 | #include 4 | 5 | #include "utils/median.hpp" 6 | 7 | BOOST_AUTO_TEST_CASE( testMedianThrowsWithEmptyList ) 8 | { 9 | std::vector< double > input = {}; 10 | BOOST_CHECK_THROW( wecall::utils::functional::median( input ), wecall::utils::wecall_exception ); 11 | } 12 | 13 | BOOST_AUTO_TEST_CASE( testMedianWithOneElement ) 14 | { 15 | std::vector< double > input = {1.0}; 16 | const auto expectedResult = 1.0; 17 | BOOST_CHECK_EQUAL( expectedResult, wecall::utils::functional::median( input ) ); 18 | } 19 | 20 | BOOST_AUTO_TEST_CASE( testMedianWithTwoElements ) 21 | { 22 | std::vector< double > input = {3.0, 1.0}; 23 | const auto expectedResult = 2.0; 24 | BOOST_CHECK_EQUAL( expectedResult, wecall::utils::functional::median( input ) ); 25 | } 26 | 27 | BOOST_AUTO_TEST_CASE( testMedianWithTwoIntegerElements ) 28 | { 29 | std::vector< int > input = {2, 1}; 30 | const auto expectedResult = 1; // Mean of 2 & 1 is 1 for integers. 31 | BOOST_CHECK_EQUAL( expectedResult, wecall::utils::functional::median( input ) ); 32 | } 33 | 34 | BOOST_AUTO_TEST_CASE( testMedianWithManyElements ) 35 | { 36 | std::vector< double > input = {8, 5, 9, 10, 1, 3, 4, 6, 7, 2}; 37 | const auto expectedResult = 5.5; 38 | BOOST_CHECK_EQUAL( expectedResult, wecall::utils::functional::median( input ) ); 39 | } 40 | -------------------------------------------------------------------------------- /cpp/test/unittest/utils/testWrite.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #define BOOST_TEST_DYN_LINK 3 | #include 4 | 5 | #include "utils/write.hpp" 6 | 7 | BOOST_AUTO_TEST_CASE( testToStringForDoubleWithNoDecimalPart ) 8 | { 9 | double value = 23.0; 10 | BOOST_CHECK_EQUAL( "23", wecall::utils::toString( value ) ); 11 | } 12 | 13 | BOOST_AUTO_TEST_CASE( testToStringForDoubleWithDecimalPart ) 14 | { 15 | double value = 23.1; 16 | BOOST_CHECK_EQUAL( "23.1", wecall::utils::toString( value ) ); 17 | } 18 | 19 | BOOST_AUTO_TEST_CASE( testToStringForDoubleWithLongDecimalPart ) 20 | { 21 | double value = 23.1234567; 22 | BOOST_CHECK_EQUAL( "23.1234567", wecall::utils::toString( value ) ); 23 | } 24 | 25 | BOOST_AUTO_TEST_CASE( testToStringForDoubleWithOnlyDecimalPart ) 26 | { 27 | double value = 0.0001; 28 | BOOST_CHECK_EQUAL( "0.0001", wecall::utils::toString( value ) ); 29 | } 30 | 31 | BOOST_AUTO_TEST_CASE( testToStringForDoubleWithTrailingZeroes ) 32 | { 33 | double value = 0.01000; 34 | BOOST_CHECK_EQUAL( "0.01", wecall::utils::toString( value ) ); 35 | } 36 | 37 | BOOST_AUTO_TEST_CASE( testToStringForPhred ) 38 | { 39 | phred_t value = 23; 40 | BOOST_CHECK_EQUAL( "23", wecall::utils::toString( value ) ); 41 | } 42 | -------------------------------------------------------------------------------- /cpp/test/unittest/varfilters/testVariantSoftFilterBank.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #define BOOST_TEST_DYN_LINK 3 | 4 | #include 5 | #include 6 | 7 | #include "varfilters/variantSoftFilterBank.hpp" 8 | #include "utils/exceptions.hpp" 9 | #include "vcf/filterDescription.hpp" 10 | 11 | using VariantSoftFilterBank = wecall::varfilters::VariantSoftFilterBank; 12 | using wecall_exception = wecall::utils::wecall_exception; 13 | 14 | BOOST_AUTO_TEST_CASE( shouldConstructValidSoftFilterBank ) 15 | { 16 | std::vector< std::string > filters( {"AB", "SB"} ); 17 | BOOST_CHECK_NO_THROW( VariantSoftFilterBank vsfb( filters, 0.01, 0.01, 0, 0, 0.0, 0.0, 0, 0 ) ); 18 | } 19 | 20 | BOOST_AUTO_TEST_CASE( shouldRejectInvalidSoftFilter ) 21 | { 22 | std::vector< std::string > filters( {"ABBA"} ); 23 | BOOST_CHECK_THROW( VariantSoftFilterBank vsfb( filters, 0.01, 0.01, 0, 0, 0.0, 0.0, 0, 0 ), wecall_exception ); 24 | } 25 | 26 | BOOST_AUTO_TEST_CASE( shouldReturnFilterDescsInADeterministicOrder ) 27 | { 28 | // Note - Currently that deterministic order is lexicographically by filter ID 29 | std::vector< std::string > filters( {"SB", "AB"} ); 30 | VariantSoftFilterBank vsfb( filters, 0.01, 0.01, 0, 0, 0.0, 0.0, 0, 0 ); 31 | auto filterDescs = vsfb.getFilterDescs(); 32 | BOOST_CHECK_EQUAL( filterDescs.size(), 2 ); 33 | BOOST_CHECK_EQUAL( filterDescs[0].id, "AB" ); 34 | BOOST_CHECK_EQUAL( filterDescs[1].id, "SB" ); 35 | } 36 | -------------------------------------------------------------------------------- /cpp/test/unittest/variant/testMnp.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "variant/type/variant.hpp" 3 | 4 | #define BOOST_TEST_DYN_LINK 5 | #include 6 | #include 7 | 8 | using wecall::utils::ReferenceSequence; 9 | using wecall::caller::Region; 10 | 11 | BOOST_AUTO_TEST_CASE( mnp ) 12 | { 13 | using namespace wecall::variant; 14 | 15 | std::string contig = "1"; 16 | const std::string removed( "ABCD" ); 17 | const std::string added( "HELL" ); 18 | 19 | const auto referenceSequence = 20 | std::make_shared< ReferenceSequence >( Region( contig, 1000000, 1000000 + removed.size() ), removed ); 21 | varPtr_t theMNP = std::make_shared< Variant >( referenceSequence, referenceSequence->region(), added ); 22 | 23 | BOOST_CHECK_EQUAL( theMNP->sequence(), added ); 24 | BOOST_CHECK_EQUAL( theMNP->refSequence().sequence(), removed ); 25 | 26 | BOOST_CHECK_EQUAL( theMNP->start(), 1000000 ); 27 | BOOST_CHECK_EQUAL( theMNP->end(), 1000004 ); 28 | 29 | auto nDiffs = 4; 30 | auto expectedPrior = 5e-5 * pow( 0.1, nDiffs - 1 ) * ( 1.0 - 0.1 ); 31 | 32 | wecall::variant::setDefaultPriors( {theMNP} ); 33 | BOOST_CHECK_CLOSE( theMNP->prior(), expectedPrior, 1e-5 ); 34 | } 35 | 36 | BOOST_AUTO_TEST_CASE( testMnpLeftAlignDoesntChangePosition ) 37 | { 38 | using namespace wecall::variant; 39 | 40 | std::string contig = "1"; 41 | const std::string seq = "TT"; 42 | 43 | const auto referenceSequence = std::make_shared< ReferenceSequence >( Region( contig, 0, 10 ), "ATATATATAA" ); 44 | 45 | varPtr_t theMnp = std::make_shared< Variant >( referenceSequence, Region( contig, 8, 10 ), seq ); 46 | 47 | theMnp->getLeftAligned( 0 ); 48 | 49 | BOOST_CHECK_EQUAL( theMnp->start(), 8 ); 50 | } 51 | -------------------------------------------------------------------------------- /cpp/test/unittest/variant/testSnp.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "variant/type/variant.hpp" 3 | 4 | #define BOOST_TEST_DYN_LINK 5 | #include 6 | #include 7 | 8 | using wecall::caller::Region; 9 | using wecall::utils::ReferenceSequence; 10 | using wecall::variant::Variant; 11 | using wecall::variant::varPtr_t; 12 | 13 | //------------------------------------------------------------------------------------------------- 14 | 15 | BOOST_AUTO_TEST_CASE( testSNP ) 16 | { 17 | std::string contig = "1"; 18 | 19 | auto referenceSequence = std::make_shared< ReferenceSequence >( Region( contig, 1000000, 1000001 ), "A" ); 20 | varPtr_t theSnp = std::make_shared< Variant >( referenceSequence, referenceSequence->region(), "T" ); 21 | 22 | BOOST_CHECK_EQUAL( theSnp->region(), Region( contig, 1000000, 1000001 ) ); 23 | BOOST_CHECK_EQUAL( theSnp->refSequence().sequence(), "A" ); 24 | BOOST_CHECK_EQUAL( theSnp->sequence(), "T" ); 25 | BOOST_CHECK_EQUAL( theSnp->start(), 1000000 ); 26 | BOOST_CHECK_EQUAL( theSnp->end(), 1000001 ); 27 | 28 | auto expectedPrior = 1e-3 / 3.0; 29 | 30 | wecall::variant::setDefaultPriors( {theSnp} ); 31 | BOOST_CHECK_CLOSE( theSnp->prior(), expectedPrior, 1e-5 ); 32 | } 33 | 34 | BOOST_AUTO_TEST_CASE( testSnpLeftAlignDoesntChangePosition ) 35 | { 36 | auto referenceSequence = std::make_shared< ReferenceSequence >( Region( "1", 0, 9 ), "ATATATATA" ); 37 | varPtr_t theSnp = std::make_shared< Variant >( referenceSequence, Region( "1", 8, 9 ), "T" ); 38 | theSnp->getLeftAligned( 0 ); 39 | BOOST_CHECK_EQUAL( theSnp->start(), 8 ); 40 | } 41 | -------------------------------------------------------------------------------- /cpp/test/unittest/vcf/VCFTestUtils.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #include "VCFTestUtils.hpp" 3 | #include 4 | #include 5 | #include 6 | 7 | boost::test_tools::predicate_result checkVariantInVector( const std::vector< varPtr_t > & variants, 8 | const varPtr_t & testVariant ) 9 | { 10 | auto comparison_func = [&testVariant]( const varPtr_t & v ) 11 | { 12 | 13 | return std::type_index( typeid( *testVariant ) ) == std::type_index( typeid( *v ) ) and *v == *testVariant; 14 | }; 15 | 16 | const auto it = std::find_if( variants.cbegin(), variants.cend(), comparison_func ); 17 | boost::test_tools::predicate_result res( false ); 18 | res.message() << "\nVariant " << testVariant->toString().c_str() << " not found in the set:\n{\n"; 19 | for ( auto & var : variants ) 20 | { 21 | res.message() << "\t" << var->toString().c_str() << "\n"; 22 | } 23 | res.message() << "}"; 24 | 25 | if ( it == variants.end() ) 26 | { 27 | return res; 28 | } 29 | // else 30 | return true; 31 | } 32 | -------------------------------------------------------------------------------- /cpp/test/unittest/vcf/VCFTestUtils.hpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #ifndef VCF_VCFTESTUTILS_HPP 3 | #define VCF_VCFTESTUTILS_HPP 4 | 5 | #include 6 | #include "vcf/record.hpp" 7 | 8 | using wecall::variant::varPtr_t; 9 | 10 | boost::test_tools::predicate_result checkVariantInVector( const std::vector< varPtr_t > & variants, 11 | const varPtr_t & testVariant ); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /cpp/test/unittest/vcf/testField.cpp: -------------------------------------------------------------------------------- 1 | // All content Copyright (C) 2018 Genomics plc 2 | #define BOOST_TEST_DYN_LINK 3 | #include 4 | 5 | #include 6 | #include "vcf/field.hpp" 7 | 8 | BOOST_AUTO_TEST_CASE( testInfoFieldsContainNoDuplicates ) 9 | { 10 | const auto infoKeys = wecall::vcf::info::getVCFKeys( true ); 11 | const std::set< std::string > asSet( infoKeys.cbegin(), infoKeys.cend() ); 12 | BOOST_CHECK_EQUAL( asSet.size(), infoKeys.size() ); 13 | } 14 | 15 | BOOST_AUTO_TEST_CASE( testCanConstructAVCFInfoFieldForEachKey ) 16 | { 17 | const auto infoKeys = wecall::vcf::info::getVCFKeys( true ); 18 | for ( const auto & infoKey : infoKeys ) 19 | { 20 | const auto formatedField = wecall::vcf::Field::infoFieldFromID( infoKey ); 21 | BOOST_CHECK_EQUAL( formatedField.m_id, infoKey ); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/python/__init__.py -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | # -*- coding: utf8 -*- 3 | 4 | import setuptools 5 | 6 | setuptools.setup( 7 | name="wecall", 8 | url="www.genomicsplc.com", 9 | author="Genomics", 10 | author_email="help@genomicsplc.com", 11 | description="wecall", 12 | license="Genomics PLC Proprietary License", 13 | keywords="wecall", 14 | packages=setuptools.find_packages(), 15 | py_modules=['wecall'], 16 | install_requires=['pysam'] 17 | ) 18 | -------------------------------------------------------------------------------- /python/wecall/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/python/wecall/__init__.py -------------------------------------------------------------------------------- /python/wecall/bamutils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/python/wecall/bamutils/__init__.py -------------------------------------------------------------------------------- /python/wecall/bamutils/sequence_position.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from wecall.common.exceptions import weCallException 3 | 4 | MATCHING_BASE = "." 5 | DELETED_BASE = "*" 6 | MISSING_BASE = " " 7 | 8 | 9 | class SequencePosition(object): 10 | 11 | def __init__(self, ref_char, seq_char, qual_char): 12 | self.__validate_input(ref_char, seq_char, qual_char) 13 | 14 | self.ref_char = ref_char 15 | self.seq_char = seq_char 16 | self.is_gap = self.__calculate_is_gap() 17 | self.qual_char = qual_char 18 | 19 | self.__validate_character_combination() 20 | 21 | def update_ref_pos(self, ref_pos): 22 | if self.ref_char != DELETED_BASE: 23 | return ref_pos + 1 24 | else: 25 | return ref_pos 26 | 27 | def __calculate_is_gap(self): 28 | return self.seq_char == MISSING_BASE 29 | 30 | @staticmethod 31 | def __validate_input(ref_char, seq_char, qual_char): 32 | if not all(len(c) == 1 for c in [ref_char, seq_char, qual_char]): 33 | raise weCallException( 34 | "All characters at sequence position has to be of length 1.") 35 | 36 | if ref_char == MISSING_BASE: 37 | raise weCallException("Missing reference character.") 38 | 39 | def __validate_character_combination(self): 40 | if self.ref_char == DELETED_BASE and self.seq_char == MATCHING_BASE: 41 | raise weCallException( 42 | "Invalid character combination: ref char = {}, sequence char = {}".format( 43 | self.ref_char, self.seq_char)) 44 | 45 | if self.seq_char == DELETED_BASE and self.qual_char != MISSING_BASE: 46 | raise weCallException( 47 | "Cannot assign base quality to a deleted base.") 48 | if self.is_gap and self.qual_char != MISSING_BASE: 49 | raise weCallException("Cannot assign base quality inside a gap.") 50 | -------------------------------------------------------------------------------- /python/wecall/bedutils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/python/wecall/bedutils/__init__.py -------------------------------------------------------------------------------- /python/wecall/bedutils/bedwriter.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from wecall.utils.tabix_indexer import TabixIndexer 3 | 4 | 5 | def bed_line_from_chrom_interval(chrom_interval): 6 | return "\t".join( 7 | str(item) for item in [ 8 | chrom_interval.chrom, 9 | chrom_interval.start, 10 | chrom_interval.end 11 | ] 12 | ) 13 | 14 | 15 | class BEDWriter(object): 16 | 17 | def __init__(self, output_stream): 18 | self.__output_stream = output_stream 19 | 20 | def write_chrom_interval(self, chrom_interval): 21 | self.__output_stream.write("{}\n".format( 22 | bed_line_from_chrom_interval(chrom_interval))) 23 | 24 | def write_bed_record(self, bed_record): 25 | self.__output_stream.write("{}\n".format(str(bed_record))) 26 | 27 | def write_chrom_intervals(self, chrom_intervals): 28 | for chrom_interval in chrom_intervals: 29 | self.write_chrom_interval(chrom_interval) 30 | 31 | 32 | class BEDWriterContextManager(object): 33 | 34 | def __init__(self, filename): 35 | self.filename = filename 36 | 37 | def __enter__(self): 38 | self.fp = open(self.filename, 'w') 39 | self.vcf_writer = BEDWriter(self.fp) 40 | return self.vcf_writer 41 | 42 | def __exit__(self, exc_type, exc_val, exc_tb): 43 | self.fp.close() 44 | 45 | 46 | class BEDIndexer(TabixIndexer): 47 | 48 | def __init__(self, filename): 49 | TabixIndexer.__init__(self, filename, "bed") 50 | -------------------------------------------------------------------------------- /python/wecall/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/python/wecall/common/__init__.py -------------------------------------------------------------------------------- /python/wecall/common/exceptions.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | """ 3 | Basic exception classes to be used throughout the weCall code. 4 | """ 5 | 6 | 7 | class weCallException(Exception): 8 | """ 9 | Base class for all exceptions. Everything we throw 10 | should derive from this. 11 | """ 12 | 13 | def __init__(self, value): 14 | self.value = value 15 | self.message = value 16 | 17 | def __str__(self): 18 | return repr(self.value) 19 | 20 | 21 | class weCallRuntimeException(weCallException): 22 | 23 | def __init__(self, return_code, result): 24 | self.return_code = return_code 25 | self.result = result 26 | self.value = "weCall exited with non-zero exit code {} for `{}`.".format( 27 | self.return_code, self.result) 28 | -------------------------------------------------------------------------------- /python/wecall/fastautils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/python/wecall/fastautils/__init__.py -------------------------------------------------------------------------------- /python/wecall/fastautils/fasta_file_builder.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | import os 3 | import subprocess 4 | from wecall.common.exceptions import weCallException 5 | from wecall.genomics.reference_genome import InMemoryReferenceGenome 6 | from wecall_test_drivers.tool_runner import ToolRunner 7 | 8 | 9 | class FastaFileBuilder(object): 10 | 11 | def __init__(self, filename, line_length=80): 12 | assert(line_length > 0) 13 | self.filename = filename 14 | self.line_length = line_length 15 | self.__reference_genome = InMemoryReferenceGenome() 16 | 17 | def reference_genome(self): 18 | return self.__reference_genome 19 | 20 | def with_chrom(self, chrom, sequence, pos_from=0): 21 | return self.__reference_genome.with_chrom(chrom, sequence, pos_from) 22 | 23 | def build(self): 24 | with open(self.filename, "w") as fasta_file: 25 | for chrom_name in self.__reference_genome.chromosomes(): 26 | sequence = self.__reference_genome.fetch(chrom_name) 27 | fasta_file.write(">{}\n".format(chrom_name)) 28 | for offset in range(0, len(sequence), self.line_length): 29 | line = sequence[offset:offset + self.line_length] + '\n' 30 | fasta_file.write(line) 31 | 32 | return self 33 | 34 | def index(self): 35 | tool_runner = ToolRunner() 36 | tool_runner.start( 37 | [os.path.join(os.environ['WECALL_BIN'], "samtools"), "faidx", self.filename]) 38 | 39 | if tool_runner.return_code != 0: 40 | raise weCallException("") 41 | else: 42 | return self 43 | -------------------------------------------------------------------------------- /python/wecall/genomics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/python/wecall/genomics/__init__.py -------------------------------------------------------------------------------- /python/wecall/genomics/chromosome.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from wecall.common.exceptions import weCallException 3 | 4 | # Define a standard ordering for chromosomes. N.B. We always use the naming convention "1", 5 | # "2", "3" etc for internal use, not "chr1", "chr2", "chr3" 6 | 7 | CHROMOSOME_LIST = [ 8 | '1', 9 | '2', 10 | '3', 11 | '4', 12 | '5', 13 | '6', 14 | '7', 15 | '8', 16 | '9', 17 | '10', 18 | '11', 19 | '12', 20 | '13', 21 | '14', 22 | '15', 23 | '16', 24 | '17', 25 | '18', 26 | '19', 27 | '20', 28 | '21', 29 | '22', 30 | 'X', 31 | 'Y', 32 | 'MT'] 33 | CHROMOSOME_ORDER = { 34 | chrom: index for index, 35 | chrom in enumerate(CHROMOSOME_LIST)} 36 | 37 | 38 | def chromosome_comp(lhs, rhs): 39 | p_left = CHROMOSOME_ORDER.get(lhs, len(CHROMOSOME_ORDER)) 40 | p_right = CHROMOSOME_ORDER.get(rhs, len(CHROMOSOME_ORDER)) 41 | if p_left != p_right: 42 | return p_left < p_right 43 | else: 44 | return lhs < rhs 45 | 46 | 47 | def standardise_chromosome(chrom): 48 | stripped_chrom = chrom.upper().replace("CHR", "").lstrip('0') 49 | if stripped_chrom == "M": 50 | return "MT" 51 | else: 52 | return stripped_chrom 53 | 54 | 55 | def add_chr(chrom): 56 | return "chr{}".format(chrom) 57 | 58 | 59 | def get_chromosome_index(chrom): 60 | try: 61 | return CHROMOSOME_ORDER[standardise_chromosome(chrom)] 62 | except KeyError: 63 | raise weCallException("Invalid chromosome {}".format(chrom)) 64 | -------------------------------------------------------------------------------- /python/wecall/genomics/reference_chromosome.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from wecall.utils.interval import ChromInterval 3 | import re 4 | from wecall.common.exceptions import weCallException 5 | from wecall.genomics.chromosome import standardise_chromosome 6 | 7 | 8 | DEFAULT_CHROM = "1" 9 | 10 | 11 | class ReferenceChromosome(object): 12 | 13 | def __init__(self, ref_string, pos_from=0, chrom=DEFAULT_CHROM): 14 | self.__validate_ref_seq(ref_string) 15 | 16 | self.chrom = chrom 17 | self.pos_from = pos_from 18 | self.ref_seq = ref_string 19 | self._ref_minus_deletions = self.ref_seq.replace("*", "") 20 | 21 | def __str__(self): 22 | return self._ref_minus_deletions 23 | 24 | def length_with_deletions(self): 25 | return len(self.ref_seq) 26 | 27 | def length_minus_deletions(self): 28 | return len(self._ref_minus_deletions) 29 | 30 | def __getitem__(self, item): 31 | return self._ref_minus_deletions[item - self.pos_from] 32 | 33 | @property 34 | def chrom_interval(self): 35 | return ChromInterval(self.chrom, self.pos_from, self.pos_to) 36 | 37 | @property 38 | def pos_to(self): 39 | return self.pos_from + len(self._ref_minus_deletions) 40 | 41 | def fasta_string(self): 42 | return self.pos_from * 'N' + self._ref_minus_deletions 43 | 44 | def __validate_ref_seq(self, ref_seq): 45 | if not re.match(r'^[ACGTURYKMSWBDHVN\*]*\Z', ref_seq): 46 | raise weCallException( 47 | "Illegal character in reference sequence {!r}".format(ref_seq)) 48 | -------------------------------------------------------------------------------- /python/wecall/genomics/reference_genome.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from abc import ABCMeta, abstractmethod 3 | from wecall.genomics.chromosome import CHROMOSOME_ORDER 4 | from wecall.genomics.reference_chromosome import ReferenceChromosome 5 | 6 | 7 | class AbstractReferenceGenome(object, metaclass=ABCMeta): 8 | 9 | @abstractmethod 10 | def chromosomes(self): 11 | return [] 12 | 13 | @abstractmethod 14 | def fetch(self, chrom, start, end): 15 | return '' 16 | 17 | @abstractmethod 18 | def get_chrom_length(self, chrom): 19 | return 0 20 | 21 | 22 | class InMemoryReferenceGenome(AbstractReferenceGenome): 23 | 24 | def __init__(self): 25 | self.__data = {} 26 | 27 | def with_chrom(self, name, sequence, pos_from=0): 28 | reference_chrom = self.__data[name] = ReferenceChromosome( 29 | sequence, pos_from, name) 30 | return reference_chrom 31 | 32 | def chromosomes(self): 33 | return sorted(list(self.__data.keys()), 34 | key=lambda x: CHROMOSOME_ORDER.get(x) or -1) 35 | 36 | def get_chrom_length(self, chrom): 37 | return self.__data[chrom].pos_to 38 | 39 | def fetch(self, chrom, start=None, end=None): 40 | if start is None: 41 | start = 0 42 | if end is None: 43 | end = self.get_chrom_length(chrom) 44 | 45 | seq = self.__data[chrom].fasta_string()[start:end] 46 | if end - start != len(seq): 47 | raise IndexError 48 | return seq 49 | -------------------------------------------------------------------------------- /python/wecall/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/python/wecall/utils/__init__.py -------------------------------------------------------------------------------- /python/wecall/utils/tabix_indexer.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | import os 3 | import subprocess 4 | 5 | WECALL_BIN = os.environ["WECALL_BIN"] 6 | 7 | 8 | class TabixIndexer(object): 9 | 10 | def __init__(self, filename, file_type=None): 11 | self.filename = filename 12 | self.file_type = file_type 13 | 14 | @property 15 | def compressed_filename(self): 16 | return self.filename + ".gz" 17 | 18 | @property 19 | def compressed_filename_index(self): 20 | return self.compressed_filename + ".tbi" 21 | 22 | def bgzip(self): 23 | subprocess.call( 24 | [os.path.join(WECALL_BIN, "bgzip"), "-f", self.filename]) 25 | return self 26 | 27 | def index(self): 28 | self.bgzip() 29 | tabix_args = [os.path.join(WECALL_BIN, "tabix"), "-f", ] 30 | if self.file_type == "VARINFO": 31 | tabix_args += ['-s', '1', '-b', '2', '-e', '3'] 32 | elif self.file_type is not None: 33 | tabix_args += ["-p", self.file_type] 34 | tabix_args.append(self.compressed_filename) 35 | subprocess.check_call(tabix_args) 36 | return self 37 | -------------------------------------------------------------------------------- /python/wecall/utils/tabix_wrapper.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from wecall.genomics.chromosome import standardise_chromosome 3 | import pysam 4 | 5 | 6 | class TabixWrapper(object): 7 | 8 | def __init__(self, tabix_filename): 9 | self.__tabix_file = pysam.Tabixfile(tabix_filename, 'r') 10 | self.__contig_mapping = {standardise_chromosome( 11 | contig): contig for contig in self.__tabix_file.contigs} 12 | 13 | @property 14 | def header(self): 15 | return (line for line in self.__tabix_file.header) 16 | 17 | @property 18 | def contigs(self): 19 | return self.__tabix_file.contigs 20 | 21 | def fetch_generator(self, chrom_interval): 22 | # Tabix will throw a ValueError if the chromosome specified is not 23 | # present in the index for this file. 24 | try: 25 | if chrom_interval.chrom is None: 26 | return self.__tabix_file.fetch() 27 | else: 28 | return self.__tabix_file.fetch( 29 | self.__contig_mapping.get( 30 | chrom_interval.chrom, 31 | chrom_interval.chrom), 32 | chrom_interval.interval.start, 33 | chrom_interval.interval.end) 34 | except ValueError: 35 | raise StopIteration 36 | 37 | def fetch_region(self, region): 38 | try: 39 | return self.__tabix_file.fetch(region=region) 40 | except ValueError: 41 | raise StopIteration 42 | 43 | def close(self): 44 | self.__tabix_file.close() 45 | 46 | def __enter__(self): 47 | return self 48 | 49 | def __exit__(self, ex_type, value, traceback): 50 | self.close() 51 | -------------------------------------------------------------------------------- /python/wecall/vcfutils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/python/wecall/vcfutils/__init__.py -------------------------------------------------------------------------------- /python/wecall/vcfutils/stringutils.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from wecall.common.exceptions import weCallException 3 | 4 | 5 | def to_vcf_str(primitive_type): 6 | if primitive_type is None: 7 | return "." 8 | elif isinstance(primitive_type, list) or isinstance(primitive_type, tuple): 9 | return ','.join(map(to_vcf_str, primitive_type)) 10 | elif isinstance(primitive_type, float): 11 | return "{:g}".format(primitive_type) 12 | else: 13 | return str(primitive_type) 14 | 15 | 16 | def from_vcf_str(vcf_str, desired_type): 17 | try: 18 | return desired_type(vcf_str) if vcf_str != "." else None 19 | except ValueError: 20 | raise weCallException( 21 | "Cannot cast {} to {!r}".format( 22 | vcf_str, desired_type)) 23 | -------------------------------------------------------------------------------- /python/wecall/wecall_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/python/wecall/wecall_utils/__init__.py -------------------------------------------------------------------------------- /python/wecall/wecall_utils/wecall_config_builder.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | class WecallConfig(object): 3 | 4 | def __init__(self, filename): 5 | self.filename = filename 6 | 7 | 8 | class ConfigFileWriter(object): 9 | 10 | def __init__(self, filename): 11 | self.__filename = filename 12 | 13 | def __enter__(self): 14 | self.__file = open(self.__filename, "w") 15 | return self 16 | 17 | def __exit__(self, exc_type, exc_val, exc_tb): 18 | self.__file.close() 19 | 20 | def write_config_line(self, key, value): 21 | self.__file.write("{} = {}\n".format(key, value)) 22 | 23 | 24 | class WecallConfigBuilder(object): 25 | 26 | def __init__(self, wecall_input_data, filestem): 27 | self.filestem = filestem 28 | self.__configuration = { 29 | "refFile": wecall_input_data.reference_filename, 30 | "inputs": ",".join(wecall_input_data.bam_filenames) 31 | } 32 | 33 | def with_configuration(self, key, value): 34 | self.__configuration[key] = value 35 | return self 36 | 37 | def build(self): 38 | filename = self.filestem + ".cfg" 39 | with ConfigFileWriter(filename) as config_writer: 40 | for key, value in list(self.__configuration.items()): 41 | config_writer.write_config_line(key=key, value=value) 42 | return WecallConfig(filename) 43 | -------------------------------------------------------------------------------- /python/wecall/wecall_utils/wecall_input_data.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | class InputData(object): 3 | 4 | def __init__(self, tags, filenames): 5 | self.tags = tags 6 | self.__filenames = filenames 7 | 8 | @property 9 | def filenames(self): 10 | return self.__filenames 11 | 12 | 13 | class WecallInputData(InputData): 14 | 15 | def __init__(self, bam_filenames, reference_filename): 16 | InputData.__init__(self, set(), set()) 17 | self.bam_filenames = bam_filenames 18 | self.reference_filename = reference_filename 19 | 20 | @property 21 | def filenames(self): 22 | filenames = set() 23 | for bam_filename in self.bam_filenames: 24 | filenames.update({bam_filename, bam_filename + ".bai"}) 25 | filenames.update( 26 | {self.reference_filename, self.reference_filename + ".fai"}) 27 | return filenames 28 | -------------------------------------------------------------------------------- /scripts/clang-reformat.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # All content Copyright (C) 2018 Genomics plc 3 | set -e -u -x -o pipefail -o errexit -o nounset 4 | 5 | clang-format-3.6 -i -style=file $(find "cpp" -name '*.[hc]pp') 6 | -------------------------------------------------------------------------------- /scripts/get-property.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | import json 3 | import sys 4 | 5 | 6 | def main(filename, property): 7 | with open(filename, 'r') as fp: 8 | props = json.load(fp) 9 | print(props[property]) 10 | 11 | 12 | if __name__ == "__main__": 13 | if len(sys.argv) < 3: 14 | raise Exception( 15 | "Usage:\n\t{exe} filename property".format( 16 | exe=sys.argv[0])) 17 | filename = sys.argv[1] 18 | property = sys.argv[2] 19 | main(filename, property) 20 | -------------------------------------------------------------------------------- /scripts/make-docs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # All content Copyright (C) 2018 Genomics plc 3 | set -e -u -x -o pipefail 4 | 5 | echo $0 6 | PROJECT_SCRIPT=$( cd "$(dirname "$0")" && pwd -P) 7 | export PROJECT_HOME="${PROJECT_SCRIPT}/../" 8 | WECALL_BUILD="${PROJECT_HOME}/target/build" 9 | 10 | command -v pdflatex >/dev/null 2>&1 || { echo >&2 "Skipping Document generation - No pdflatex install found." ; exit 0 ; } 11 | 12 | # generate some data 13 | "${WECALL_BUILD}/weCall" --help | python "${PROJECT_HOME}/scripts/help_to_latex.py" > "${PROJECT_HOME}/doc/wecall-params.tex" 14 | 15 | # make 16 | cd "$PROJECT_HOME/doc" 17 | pdflatex -interaction=nonstopmode -halt-on-error -output-directory "${WECALL_BUILD}/" "${PROJECT_HOME}/doc/weCall-userguide.tex" 18 | pdflatex -interaction=nonstopmode -halt-on-error -output-directory "${WECALL_BUILD}/" "${PROJECT_HOME}/doc/weCall-userguide.tex" 19 | -------------------------------------------------------------------------------- /scripts/run-tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # All content Copyright (C) 2018 Genomics plc 3 | set -e -u -x -o pipefail 4 | 5 | PROJECT_SCRIPT=$( cd "$(dirname "$0")" && pwd -P) 6 | PROJECT_HOME="${PROJECT_SCRIPT}/../" 7 | export WECALL_TEST_RESULTS="${PROJECT_HOME}/target/test-results" 8 | export WECALL_BIN="${PROJECT_HOME}/target/build" 9 | 10 | mkdir -p "${WECALL_TEST_RESULTS}" 11 | 12 | "${WECALL_BIN}/unittest" | tee >> "${WECALL_TEST_RESULTS}/unittest.log" 13 | set +e +u 14 | . ${PROJECT_HOME}/env-wecall/bin/activate 15 | set -e -u 16 | 17 | pytest ${@} \ 18 | --flakes \ 19 | --junit-xml="${WECALL_TEST_RESULTS}/acceptance-test.xml" \ 20 | --cov wecall \ 21 | --cov wecall_test_drivers \ 22 | --cov-report term:skip-covered \ 23 | --cov-report xml:"${WECALL_TEST_RESULTS}/acceptance-test-coverage.xml" \ 24 | --cov-report html:"${WECALL_TEST_RESULTS}/acceptance_test_coverage_html" \ 25 | --no-cov-on-fail 26 | -------------------------------------------------------------------------------- /scripts/static-checks.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # All content Copyright (C) 2018 Genomics plc 3 | set -e -u -x -o pipefail 4 | 5 | # requires: cppcheck 6 | if ! command -v cppcheck > /dev/null; then 7 | echo "Cannot find 'cppcheck', try 'sudo apt-get install cppcheck'." 8 | exit 1 9 | fi 10 | 11 | # reformatting 12 | if python "$WECALL_SCRIPTS/clang-format-check.py"; then 13 | echo -e "\x1b[1;32mReformatting check passed.\x1b[0m" 14 | else 15 | echo -e "\x1b[1;31mReformatting check failed.\x1b[0m" 16 | exit 1 17 | fi 18 | 19 | # check the entire codebase & record the return code 20 | # Note: this doesn't check included files from the standard library or boost 21 | CPPCOPTS=(-j "$(nproc)" --force --quiet --inline-suppr -UDEBUG --language=c++) 22 | CPPCENABLES='--enable=warning,style,performance,portability,information,missingInclude' 23 | CPPCSUPPRESSIONS=(--suppress=*:*/dependencies/samtools/include/*) 24 | CPPCINCLUDES=(-I${WECALL_CPP_SOURCE} -I${WECALL_BUILD}/dependencies/samtools/include) 25 | cppcheck ${CPPCOPTS[*]} "$CPPCENABLES" ${CPPCSUPPRESSIONS[*]} ${CPPCINCLUDES[*]} ${WECALL_CPP} 2>&1 | python ${WECALL_SCRIPTS}/count-errors.py 26 | ERR=$? 27 | 28 | if [ 0 -eq $ERR ]; then 29 | echo -e "\x1b[1;32mSCA passed.\x1b[0m" 30 | else 31 | echo -e "\x1b[1;31mSCA failed.\x1b[0m" 32 | fi 33 | 34 | # propagate any errors to the test driver 35 | exit $ERR 36 | 37 | -------------------------------------------------------------------------------- /test-drivers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test-drivers/__init__.py -------------------------------------------------------------------------------- /test-drivers/setup.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | # -*- coding: utf8 -*- 3 | 4 | import setuptools 5 | 6 | setuptools.setup( 7 | name="wecall-test-drivers", 8 | url="www.genomicsplc.com", 9 | author="Genomics", 10 | author_email="help@genomicsplc.com", 11 | description="wecall-test-drivers", 12 | license="Genomics PLC Proprietary License", 13 | keywords="wecall-test-drivers", 14 | packages=setuptools.find_packages(), 15 | py_modules=['wecall-test-drivers'], 16 | install_requires=[( 17 | 'pytest', 'pytest-cov', 'pytest-flakes', 'pytest-pep8', 'pytest-xdist', 18 | 'testfixtures', 'pysam', 'psutil')], 19 | ) 20 | -------------------------------------------------------------------------------- /test-drivers/wecall_test_drivers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test-drivers/wecall_test_drivers/__init__.py -------------------------------------------------------------------------------- /test-drivers/wecall_test_drivers/base_test.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from logging import FileHandler, StreamHandler, DEBUG, INFO, getLogger, Formatter 3 | import shutil 4 | import unittest 5 | import os 6 | import sys 7 | 8 | 9 | class BaseTest(unittest.TestCase): 10 | 11 | def setUp(self): 12 | self.work_dir = os.path.join( 13 | os.environ["WECALL_TEST_RESULTS"], 14 | *self.id().split(".")) 15 | if os.path.exists(self.work_dir): 16 | shutil.rmtree(self.work_dir) 17 | os.makedirs(self.work_dir) 18 | 19 | logger = getLogger() 20 | logger.setLevel(DEBUG) 21 | fh = FileHandler(os.path.join(self.work_dir, "test.log")) 22 | ch = StreamHandler(sys.stdout) 23 | logger.addHandler(configure_log_handler(fh, DEBUG)) 24 | logger.addHandler(configure_log_handler(ch, INFO)) 25 | 26 | def tearDown(self): 27 | logger = getLogger() 28 | for handler in logger.handlers[:]: 29 | handler.close() 30 | logger.removeHandler(handler) 31 | 32 | 33 | def configure_log_handler(handler, level): 34 | handler.setLevel(level) 35 | formatter = Formatter('%(message)s') 36 | handler.setFormatter(formatter) 37 | return handler 38 | -------------------------------------------------------------------------------- /test-drivers/wecall_test_drivers/timed_command.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from wecall_test_drivers.tool_runner import ToolRunner 3 | import json 4 | import logging 5 | import tempfile 6 | import psutil 7 | import time 8 | 9 | 10 | class TimedCommand(ToolRunner): 11 | 12 | def __init__(self): 13 | ToolRunner.__init__(self) 14 | self.user_time = None 15 | self.system_time = None 16 | 17 | @property 18 | def times(self): 19 | return {"user_time": self.user_time, "system_time": self.system_time} 20 | 21 | def dump_timing_json(self, filename): 22 | with open(filename, "w") as json_fp: 23 | json.dump(self.times, json_fp, indent=4, sort_keys=True) 24 | json_fp.write("\n") 25 | 26 | def log_output(self): 27 | ToolRunner.log_output(self) 28 | logging.info("user_time: {}".format(self.user_time)) 29 | logging.info("system_time: {}".format(self.system_time)) 30 | 31 | def run(self, command, cwd=None): 32 | with tempfile.TemporaryFile() as stdout, tempfile.TemporaryFile() as stderr: 33 | process = psutil.Popen( 34 | command, stdout=stdout, stderr=stderr, cwd=cwd) 35 | while process.status() != psutil.STATUS_ZOMBIE: 36 | time.sleep(0) 37 | stdout.seek(0) 38 | self.stdout = stdout.read() 39 | stderr.seek(0) 40 | self.stderr = stderr.read() 41 | 42 | times = process.cpu_times() 43 | self.user_time, self.system_time = times.user, times.system 44 | process.wait() 45 | self.return_code = process.returncode 46 | 47 | return self 48 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/__init__.py -------------------------------------------------------------------------------- /test/test_style/test_wecall_pep8.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | import os 3 | import unittest 4 | 5 | import pep8 6 | 7 | 8 | class TestPep8(unittest.TestCase): 9 | """Run PEP8 on all files in this directory and subdirectories.""" 10 | 11 | def setUp(self): 12 | base_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( 13 | os.path.abspath(__file__))))) 14 | self.base_dirs = [os.path.join(base_dir, sub_dir) for sub_dir 15 | in ["python", "scripts", "test", "test_drivers"]] 16 | 17 | def test_pep8(self): 18 | style = pep8.StyleGuide() 19 | style.options.max_line_length = 120 # because it isn't 1928 anymore 20 | errors = 0 21 | 22 | for base_dir in self.base_dirs: 23 | for root, _, files in os.walk(base_dir): 24 | python_files = [f for f in files if f.endswith('.py')] 25 | for pf in python_files: 26 | check = style.check_files([os.path.join(root, pf)]) 27 | errors += check.file_errors 28 | self.assertEqual(errors, 0) 29 | -------------------------------------------------------------------------------- /test/test_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/test_utils/__init__.py -------------------------------------------------------------------------------- /test/test_utils/bamutils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/test_utils/bamutils/__init__.py -------------------------------------------------------------------------------- /test/test_utils/bamutils/test_cigar.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from unittest import TestCase 3 | from wecall.bamutils.cigar import Cigar 4 | 5 | 6 | class TestCigar(TestCase): 7 | def test_should_be_able_to_create_cigar_for_insertion(self): 8 | cigar = Cigar([(Cigar.INSERTION, 10)]) 9 | self.assertEqual(str(cigar), "10I") 10 | 11 | def test_should_be_able_to_create_cigar_for_deletion(self): 12 | cigar = Cigar([(Cigar.DELETION, 10)]) 13 | self.assertEqual(str(cigar), "10D") 14 | 15 | def test_should_be_able_to_create_cigar_for_match(self): 16 | cigar = Cigar([(Cigar.MATCH, 10)]) 17 | self.assertEqual(str(cigar), "10M") 18 | 19 | def test_should_reduce_cigars_correctly_on_construction(self): 20 | cigar = Cigar([(Cigar.MATCH, 10), (Cigar.MATCH, 7)]) 21 | self.assertEqual(str(cigar), "17M") 22 | 23 | def test_should_be_able_to_add_cigars(self): 24 | cigar_1 = Cigar([(Cigar.DELETION, 10), (Cigar.MATCH, 7)]) 25 | cigar_2 = Cigar([(Cigar.MATCH, 6), (Cigar.INSERTION, 7)]) 26 | 27 | self.assertEqual(str(cigar_1 + cigar_2), "10D13M7I") 28 | -------------------------------------------------------------------------------- /test/test_utils/bedutils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/test_utils/bedutils/__init__.py -------------------------------------------------------------------------------- /test/test_utils/bedutils/test_bedwriter.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from unittest import TestCase 3 | from wecall.bedutils.bedrecord import BEDRecord 4 | from wecall.bedutils.bedwriter import bed_line_from_chrom_interval, BEDWriter 5 | from wecall.utils.interval import ChromInterval 6 | 7 | 8 | class MockStream(object): 9 | def __init__(self): 10 | self.lines = [] 11 | 12 | def write(self, line): 13 | self.lines.append(line) 14 | 15 | 16 | class TestBedLineFromChromInterval(TestCase): 17 | def test_should_write_tab_delimited_region(self): 18 | region = ChromInterval("20", 1, 2) 19 | 20 | output_line = bed_line_from_chrom_interval(region) 21 | self.assertEqual(output_line, "20\t1\t2") 22 | 23 | 24 | class TestBEDWriterWritesChromIntervals(TestCase): 25 | def test_should_write_line_to_stream(self): 26 | output_stream = MockStream() 27 | 28 | writer = BEDWriter(output_stream) 29 | writer.write_chrom_interval(ChromInterval("20", 1, 2)) 30 | 31 | # Then 32 | self.assertEqual(output_stream.lines, ["20\t1\t2\n"]) 33 | 34 | def test_should_write_bed_record_to_stream(self): 35 | output_stream = MockStream() 36 | 37 | writer = BEDWriter(output_stream) 38 | writer.write_bed_record(BEDRecord('1', 1, 2, None, 5, 'd', 'bah')) 39 | 40 | self.assertEqual(output_stream.lines, ['1\t1\t2\t.\t5\td\n']) 41 | -------------------------------------------------------------------------------- /test/test_utils/fastautils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/test_utils/fastautils/__init__.py -------------------------------------------------------------------------------- /test/test_utils/genomics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/test_utils/genomics/__init__.py -------------------------------------------------------------------------------- /test/test_utils/genomics/test_reference_genome.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from unittest import TestCase 3 | from wecall.genomics.reference_genome import InMemoryReferenceGenome 4 | 5 | 6 | class TestInMemoryReferenceGenome(TestCase): 7 | 8 | def test_should_not_get_duplicate_or_extra_chromosomes(self): 9 | reference_genome = InMemoryReferenceGenome() 10 | reference_genome.with_chrom("1", "", 0) 11 | reference_genome.with_chrom("1", "", 0) 12 | reference_genome.with_chrom("2", "", 0) 13 | 14 | self.assertEqual({"1", "2"}, set(reference_genome.chromosomes())) 15 | 16 | def test_should_get_chromosomes_in_correct_order(self): 17 | reference_genome = InMemoryReferenceGenome() 18 | reference_genome.with_chrom("1", "", 0) 19 | reference_genome.with_chrom("3", "", 0) 20 | reference_genome.with_chrom("2", "", 0) 21 | 22 | self.assertEqual(['1', '2', '3'], reference_genome.chromosomes()) 23 | 24 | def test_should_get_total_chromosome_length(self): 25 | reference_genome = InMemoryReferenceGenome() 26 | reference_genome.with_chrom("1", "ATG", 11) 27 | 28 | self.assertEqual(14, reference_genome.get_chrom_length("1")) 29 | 30 | def test_should_fetch_correct_sequence_with_padding(self): 31 | reference_genome = InMemoryReferenceGenome() 32 | reference_genome.with_chrom("1", "ATG", 11) 33 | 34 | self.assertEqual("NA", reference_genome.fetch("1", 10, 12)) 35 | 36 | def test_should_get_index_error_when_out_of_range(self): 37 | reference_genome = InMemoryReferenceGenome() 38 | reference_genome.with_chrom("1", "ATG", 11) 39 | 40 | with self.assertRaises(IndexError): 41 | print((reference_genome.fetch("1", 100, 120))) 42 | -------------------------------------------------------------------------------- /test/test_utils/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/test_utils/utils/__init__.py -------------------------------------------------------------------------------- /test/test_utils/vcfutils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/test_utils/vcfutils/__init__.py -------------------------------------------------------------------------------- /test/test_utils/vcfutils/test_chromosome.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | import unittest 3 | from wecall.genomics.chromosome import chromosome_comp 4 | 5 | 6 | class TestChromosomeSort(unittest.TestCase): 7 | def test_should_sort_chrom_2_before_chrom_10(self): 8 | self.assertTrue(chromosome_comp('2', '10')) 9 | self.assertFalse(chromosome_comp('10', '2')) 10 | 11 | def test_should_sort_22_before_X(self): 12 | self.assertTrue(chromosome_comp('22', 'X')) 13 | self.assertFalse(chromosome_comp('X', '22')) 14 | 15 | def test_should_sort_X_before_Y(self): 16 | self.assertTrue(chromosome_comp('X', 'Y')) 17 | self.assertFalse(chromosome_comp('Y', 'X')) 18 | 19 | def test_should_sort_Y_before_MT(self): 20 | self.assertTrue(chromosome_comp('Y', 'MT')) 21 | self.assertFalse(chromosome_comp('MT', 'Y')) 22 | 23 | def test_should_sort_non_standard_chroms_after_standard_chroms(self): 24 | self.assertTrue(chromosome_comp("MT", "GL000193.1")) 25 | self.assertFalse(chromosome_comp("GL000193.1", "MT")) 26 | 27 | def test_should_sort_non_standard_chroms_lexicographically(self): 28 | self.assertTrue(chromosome_comp("GL000193.1", "GL000193.2")) 29 | self.assertFalse(chromosome_comp("GL000193.2", "GL000193.1")) 30 | -------------------------------------------------------------------------------- /test/test_utils/wecall_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/test_utils/wecall_utils/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/wecall_acceptance/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/call_filters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/wecall_acceptance/call_filters/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/call_filters/test_quality_filter.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from wecall.genomics.variant import Variant 3 | from wecall_test_drivers.base_test import BaseTest 4 | from wecall_test_drivers.svc_driver import SVCDriver 5 | 6 | 7 | class TestLowQualityFilter(BaseTest): 8 | def test_should_filter_low_quality_call(self): 9 | chrom = 'chr1' 10 | svc = SVCDriver(self) 11 | 12 | svc.with_ref_sequence( 13 | "AAAGCGTACAACCGGGTTAGTCACAAACCCGTTACGTATGCATG", chrom=chrom 14 | ).with_read( 15 | "................G...........................", 16 | n_rev=1, n_fwd=1, chrom=chrom 17 | ).with_read( 18 | "............................................", 19 | n_rev=1, n_fwd=1, chrom=chrom 20 | ).with_min_call_qual(40) 21 | 22 | expect = svc.call() 23 | 24 | expect.with_output_vcf() \ 25 | .record_count(1) \ 26 | .has_record_for_variant(Variant(chrom, 16, 'T', 'G')) \ 27 | .with_filters({'LQ'}) 28 | 29 | def test_should_not_filter_high_quality_call(self): 30 | chrom = 'chr1' 31 | svc = SVCDriver(self) 32 | 33 | svc.with_ref_sequence( 34 | "AAAGCGTACAACCGGGTTAGTCACAAACCCGTTACGTATGCATG", chrom=chrom 35 | ).with_read( 36 | "................G...........................", 37 | n_rev=10, n_fwd=10, chrom=chrom 38 | ).with_read( 39 | "............................................", 40 | n_rev=10, n_fwd=10, chrom=chrom 41 | ).with_min_call_qual(40) 42 | 43 | expect = svc.call() 44 | 45 | expect.with_output_vcf() \ 46 | .record_count(1) \ 47 | .has_record_for_variant(Variant(chrom, 16, 'T', 'G')) \ 48 | .with_no_filters() 49 | -------------------------------------------------------------------------------- /test/wecall_acceptance/call_filters/test_var_filter_ids.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from wecall_test_drivers.base_test import BaseTest 3 | from wecall_test_drivers.svc_driver import SVCDriver 4 | 5 | 6 | class TestVarFilterIDs(BaseTest): 7 | def test_should_error_with_not_allowed_var_filter_id(self): 8 | svc = SVCDriver(self) \ 9 | .with_var_filters("JONNY", "SB") \ 10 | .with_verbosity(0) 11 | 12 | svc.with_ref_sequence( 13 | "AAAGCGTACAACCGGGTTAGTCACAAACCCGTTACGTATGCATG" 14 | ).with_read( 15 | "................G...........................", 16 | ) 17 | expect = svc.call(expected_success=False) 18 | expect.incorrect_var_ids_error("JONNY") 19 | 20 | def test_should_error_with_not_allowed_var_filter_ids(self): 21 | svc = SVCDriver(self) \ 22 | .with_var_filters("JONNY", "ANDY", "SB", "EDWARD", "STEFANIE") \ 23 | .with_verbosity(0) 24 | 25 | svc.with_ref_sequence( 26 | "AAAGCGTACAACCGGGTTAGTCACAAACCCGTTACGTATGCATG" 27 | ).with_read( 28 | "................G...........................", 29 | ) 30 | expect = svc.call(expected_success=False) 31 | expect.incorrect_var_ids_error("JONNY", "ANDY", "EDWARD", "STEFANIE") 32 | -------------------------------------------------------------------------------- /test/wecall_acceptance/calling_using_skipped_sequence/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/wecall_acceptance/calling_using_skipped_sequence/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/genotyping/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/wecall_acceptance/genotyping/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/malformed_inputs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/wecall_acceptance/malformed_inputs/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/multi_sample_diploid/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/wecall_acceptance/multi_sample_diploid/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/output_representations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/wecall_acceptance/output_representations/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/phased_genotypes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/wecall_acceptance/phased_genotypes/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/ploidy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/wecall_acceptance/ploidy/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/ploidy/test_ploidy_1.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from wecall.genomics.variant import Variant 3 | from wecall_test_drivers.base_test import BaseTest 4 | from wecall_test_drivers.svc_driver import SVCDriver 5 | 6 | ref_alt = "" 7 | 8 | 9 | class TestCallingWithPloidy1(BaseTest): 10 | def test_should_call_variants(self): 11 | chrom = 'chr1' 12 | sample_name = 'sample' 13 | svc = SVCDriver(self) \ 14 | .with_ploidy(1) 15 | 16 | svc.with_ref_sequence( 17 | "AAAGCGTACAACCGGGTTAGTC***AACCCGTTACGTATGCATG", chrom=chrom 18 | ).with_read( 19 | "................G.....ATG.......***.........", n_rev=10, n_fwd=10, chrom=chrom, sample_name=sample_name 20 | ) 21 | 22 | expect = svc.call() 23 | 24 | vcf = expect \ 25 | .with_output_vcf() \ 26 | .record_count(3) 27 | 28 | vcf.has_record_for_variant(Variant(chrom, 16, 'T', 'G')).with_sample(sample_name).has_genotype('1') 29 | vcf.has_record_for_variant(Variant(chrom, 21, 'C', 'CATG')).with_sample(sample_name).has_genotype('1') 30 | vcf.has_record_for_variant(Variant(chrom, 28, 'TTAC', 'T')).with_sample(sample_name).has_genotype('1') 31 | 32 | def test_should_support_refcalls(self): 33 | chrom = 'chr1' 34 | sample_name = 'sample' 35 | svc = SVCDriver(self) \ 36 | .with_ploidy(1) \ 37 | .with_output_ref_calls(True) 38 | 39 | svc.with_ref_sequence( 40 | "AAAGCGTACAACCGGGTTAGTCTCAAACCCGTTACGTATGCATG", chrom=chrom 41 | ).with_read( 42 | "............................................", n_rev=10, n_fwd=10, chrom=chrom, sample_name=sample_name 43 | ) 44 | 45 | expect = svc.call() 46 | 47 | vcf = expect \ 48 | .with_output_vcf() \ 49 | .record_count(1) 50 | 51 | vcf.has_record_for_variant(Variant(chrom, 0, 'A', ref_alt)).with_sample(sample_name).has_genotype('0') 52 | -------------------------------------------------------------------------------- /test/wecall_acceptance/read_filters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/wecall_acceptance/read_filters/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/reference_calling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/wecall_acceptance/reference_calling/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/reference_calling/test_cli.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from wecall_test_drivers.ascii_wecall_runner import AsciiWecallRunnerTest 3 | from wecall_test_drivers.svc_driver import SVCDriver 4 | 5 | 6 | ref_alt = "" 7 | 8 | 9 | class TestRefCallingMaxRefCallSize(AsciiWecallRunnerTest): 10 | def test_splits_reference_call_into_three_records(self): 11 | chrom = "1" 12 | sample = "bah.asdhaslkdghalsdkfq25451c`52980biqweuo8!" 13 | 14 | driver = SVCDriver(self) 15 | driver.with_ref_sequence( 16 | "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", chrom=chrom 17 | ).with_read( 18 | " ", sample_name=sample 19 | ).with_output_ref_calls(True).with_max_ref_call_size(20) 20 | 21 | vcf_expect = driver.call().with_output_vcf() 22 | 23 | vcf_expect.record_count(3) 24 | vcf_expect.has_record(chrom, 0, "A", ref_alt).with_sample(sample).has_genotype("0/0") 25 | vcf_expect.has_record(chrom, 20, "A", ref_alt).with_sample(sample).has_genotype("0/0") 26 | vcf_expect.has_record(chrom, 40, "A", ref_alt).with_sample(sample).has_genotype("0/0") 27 | -------------------------------------------------------------------------------- /test/wecall_acceptance/reference_calling/test_quality.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from wecall_test_drivers.base_test import BaseTest 3 | from wecall_test_drivers.svc_driver import SVCDriver 4 | from wecall_test_drivers.vcf_expectation import ref_alt 5 | 6 | 7 | class TestRefCallingQuality(BaseTest): 8 | def test_get_unknown_quality_if_no_reads_span_region(self): 9 | chrom = "1" 10 | 11 | driver = SVCDriver(self) 12 | driver.with_ref_sequence( 13 | "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", chrom=chrom 14 | ).with_read( 15 | " ", 16 | ).with_output_ref_calls(True) 17 | 18 | driver.call().with_output_vcf().has_record(chrom, 0, "A", ref_alt).with_quality(None) 19 | -------------------------------------------------------------------------------- /test/wecall_acceptance/regions_specification/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/wecall_acceptance/regions_specification/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/single_sample_diploid/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/wecall_acceptance/single_sample_diploid/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/single_sample_diploid/test_calls_mnps.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | from unittest import expectedFailure 3 | 4 | from wecall.genomics.variant import Variant 5 | from wecall_test_drivers.ascii_wecall_runner import AsciiWecallRunnerTest 6 | from wecall_test_drivers.svc_driver import SVCDriver 7 | 8 | 9 | class TestMNPCalling(AsciiWecallRunnerTest): 10 | @expectedFailure 11 | def test_should_call_mnp_at_the_end_of_read(self): 12 | expected_variant_stubs = { 13 | (3, "CTT", "C"), 14 | (18, "GT", "G"), 15 | (25, "C", "T"), 16 | # this MNP is not called and neither are the composing SNPs 17 | (37, "GCCG", "ACCT"), 18 | (45, "CTT", "C"), 19 | (53, "T", "TCTG") 20 | } 21 | 22 | self.calls_variants( 23 | "AACCTTGGACGTTATTCTGTCAATGCATCCCATTGCCGCCGCAACCTTGGACGT***TATTCTGTC", 24 | [" ...**.............*.....T...........A..T. ...**......CTG.........", ], 25 | n_fwd=10, n_rev=10, 26 | expected_variant_stubs=expected_variant_stubs 27 | ) 28 | 29 | def test_calls_mnp_formed_by_overlapping_reads(self): 30 | sn = "a_sample" 31 | 32 | svc_driver = SVCDriver(self).with_allow_MNP_calls(True) 33 | svc_driver.with_ref_sequence( 34 | "AACCTTGGACGTTATTCTGTCAATGCATCCCATTGCCGCCGCAACCTTGGACGTTATTCTGTC", chrom="1" 35 | ).with_read( 36 | "..................T.. ....C.......C............................", sample_name=sn, n_fwd=3, n_rev=3 37 | ).with_read( 38 | "..................T.......C... ...C............................", sample_name=sn, n_fwd=3, n_rev=3 39 | ).with_output_phased_genotypes(True) 40 | 41 | svc_driver.call().with_output_vcf()\ 42 | .has_record_for_variant(Variant("1", 18, "GTCAATGCATCCCATTG", "TTCAATGCCTCCCATTC"))\ 43 | .with_sample(sn)\ 44 | .has_genotype("1|1") 45 | -------------------------------------------------------------------------------- /test/wecall_acceptance/somatic_variant_calls/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/wecall_acceptance/somatic_variant_calls/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/wecall_runner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/test/wecall_acceptance/wecall_runner/__init__.py -------------------------------------------------------------------------------- /test/wecall_acceptance/wecall_runner/test_from_config_file.py: -------------------------------------------------------------------------------- 1 | # All content Copyright (C) 2018 Genomics plc 2 | import os 3 | from wecall_test_drivers.wecall_config_file_test_runnner import WecallConfigFileRunnerTest 4 | 5 | 6 | class TestsFromConfigFiles(WecallConfigFileRunnerTest): 7 | def test_basic_config(self): 8 | test_dir = os.path.dirname(__file__) 9 | fileName = os.path.join(test_dir, "config", "basic_calling_test.config") 10 | self.run_from_config_file(fileName) 11 | -------------------------------------------------------------------------------- /vendor/Makefile: -------------------------------------------------------------------------------- 1 | SUBDIRS=samtools tabix 2 | 3 | .PHONY: subdirs $(SUBDIRS) 4 | 5 | subdirs: $(SUBDIRS) 6 | 7 | $(SUBDIRS): 8 | $(MAKE) --directory=$@ 9 | 10 | clean: 11 | $(MAKE) --directory=samtools clean 12 | $(MAKE) --directory=tabix clean 13 | -------------------------------------------------------------------------------- /vendor/README: -------------------------------------------------------------------------------- 1 | upstream dependencies that we use directly. This should be done out of tree. Currently we have: 2 | 3 | bedtools-2.17.0 4 | samtools-0.1.19 5 | tabix-0.2.6 6 | -------------------------------------------------------------------------------- /vendor/samtools/.gitignore: -------------------------------------------------------------------------------- 1 | libbam.so* 2 | samtools 3 | *.o 4 | .*.swp 5 | *.a 6 | *.dSYM 7 | -------------------------------------------------------------------------------- /vendor/samtools/AUTHORS: -------------------------------------------------------------------------------- 1 | Heng Li from the Sanger Institute wrote most of the initial source codes 2 | of SAMtools and various converters. 3 | 4 | Bob Handsaker from the Broad Institute is a major contributor to the 5 | SAM/BAM specification. He designed and implemented the BGZF format, the 6 | underlying indexable compression format for the BAM format. BGZF does 7 | not support arithmetic between file offsets. 8 | 9 | Jue Ruan for the Beijing Genome Institute designed and implemented the 10 | RAZF format, an alternative indexable compression format. RAZF supports 11 | arithmetic between file offsets, at the cost of increased index file 12 | size and the full compatibility with gzip. RAZF is optional and only 13 | used in `faidx' for indexing RAZF compressed fasta files. 14 | 15 | Colin Hercus updated novo2sam.pl to support gapped alignment by 16 | novoalign. 17 | 18 | Petr Danecek contributed the header parsing library sam_header.c and 19 | sam2vcf.pl script and added knet support to the RAZF library. 20 | 21 | -------------------------------------------------------------------------------- /vendor/samtools/COPYING: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2008-2009 Genome Research Ltd. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /vendor/samtools/INSTALL: -------------------------------------------------------------------------------- 1 | System Requirements 2 | =================== 3 | 4 | SAMtools depends on the zlib library . Version 1.2.3+ is 5 | preferred and with 1.2.3+ you can compile razip and use it to compress a FASTA 6 | file. SAMtools' faidx is able to index a razip-compressed FASTA file to save 7 | diskspace. Older zlib also works with SAMtools, but razip cannot be compiled. 8 | 9 | The text-based viewer (tview) requires the GNU ncurses library 10 | , which comes with Mac OS X and most of 11 | the modern Linux/Unix distributions. If you do not have this library installed, 12 | you can still compile the rest of SAMtools by manually changing: 13 | `-D_CURSES_LIB=1' to `-D_CURSES_LIB=0' at the line starting with `DFLAGS=', and 14 | comment out the line starting with `LIBCURSES='. 15 | 16 | 17 | Compilation 18 | =========== 19 | 20 | Type `make' to compile samtools. If you have zlib >= 1.2.2.1, you can compile 21 | razip with `make razip'. 22 | 23 | 24 | Installation 25 | ============ 26 | 27 | Copy `samtools', `bcftools/bcftools' and other executables/scripts in `misc' to 28 | a location you want (e.g. a directory in your $PATH). You may also copy 29 | `samtools.1' and `bcftools/bcftools.1' to a directory in your $MANPATH such 30 | that the `man' command may find the manual. 31 | -------------------------------------------------------------------------------- /vendor/samtools/bam_endian.h: -------------------------------------------------------------------------------- 1 | #ifndef BAM_ENDIAN_H 2 | #define BAM_ENDIAN_H 3 | 4 | #include 5 | 6 | static inline int bam_is_big_endian() 7 | { 8 | long one= 1; 9 | return !(*((char *)(&one))); 10 | } 11 | static inline uint16_t bam_swap_endian_2(uint16_t v) 12 | { 13 | return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); 14 | } 15 | static inline void *bam_swap_endian_2p(void *x) 16 | { 17 | *(uint16_t*)x = bam_swap_endian_2(*(uint16_t*)x); 18 | return x; 19 | } 20 | static inline uint32_t bam_swap_endian_4(uint32_t v) 21 | { 22 | v = ((v & 0x0000FFFFU) << 16) | (v >> 16); 23 | return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); 24 | } 25 | static inline void *bam_swap_endian_4p(void *x) 26 | { 27 | *(uint32_t*)x = bam_swap_endian_4(*(uint32_t*)x); 28 | return x; 29 | } 30 | static inline uint64_t bam_swap_endian_8(uint64_t v) 31 | { 32 | v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); 33 | v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); 34 | return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); 35 | } 36 | static inline void *bam_swap_endian_8p(void *x) 37 | { 38 | *(uint64_t*)x = bam_swap_endian_8(*(uint64_t*)x); 39 | return x; 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /vendor/samtools/bam_reheader.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "knetfile.h" 4 | #include "bgzf.h" 5 | #include "bam.h" 6 | 7 | #define BUF_SIZE 0x10000 8 | 9 | int bam_reheader(BGZF *in, const bam_header_t *h, int fd) 10 | { 11 | BGZF *fp; 12 | bam_header_t *old; 13 | int len; 14 | uint8_t *buf; 15 | if (in->is_write) return -1; 16 | buf = malloc(BUF_SIZE); 17 | old = bam_header_read(in); 18 | fp = bgzf_fdopen(fd, "w"); 19 | bam_header_write(fp, h); 20 | if (in->block_offset < in->block_length) { 21 | bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset); 22 | bgzf_flush(fp); 23 | } 24 | #ifdef _USE_KNETFILE 25 | while ((len = knet_read(in->fp, buf, BUF_SIZE)) > 0) 26 | fwrite(buf, 1, len, fp->fp); 27 | #else 28 | while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0) 29 | fwrite(buf, 1, len, fp->file); 30 | #endif 31 | free(buf); 32 | fp->block_offset = in->block_offset = 0; 33 | bgzf_close(fp); 34 | return 0; 35 | } 36 | 37 | int main_reheader(int argc, char *argv[]) 38 | { 39 | bam_header_t *h; 40 | BGZF *in; 41 | if (argc != 3) { 42 | fprintf(stderr, "Usage: samtools reheader \n"); 43 | return 1; 44 | } 45 | { // read the header 46 | tamFile fph = sam_open(argv[1]); 47 | if (fph == 0) { 48 | fprintf(stderr, "[%s] fail to read the header from %s.\n", __func__, argv[1]); 49 | return 1; 50 | } 51 | h = sam_header_read(fph); 52 | sam_close(fph); 53 | } 54 | in = strcmp(argv[2], "-")? bam_open(argv[2], "r") : bam_dopen(fileno(stdin), "r"); 55 | if (in == 0) { 56 | fprintf(stderr, "[%s] fail to open file %s.\n", __func__, argv[2]); 57 | return 1; 58 | } 59 | bam_reheader(in, h, fileno(stdout)); 60 | bgzf_close(in); 61 | return 0; 62 | } 63 | -------------------------------------------------------------------------------- /vendor/samtools/bcftools/Makefile: -------------------------------------------------------------------------------- 1 | CC= gcc 2 | CFLAGS= -g -Wall -O2 #-m64 #-arch ppc 3 | DFLAGS= -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE 4 | LOBJS= bcf.o vcf.o bcfutils.o prob1.o em.o kfunc.o kmin.o index.o fet.o mut.o bcf2qcall.o 5 | OMISC= .. 6 | AOBJS= call1.o main.o $(OMISC)/kstring.o $(OMISC)/bgzf.o $(OMISC)/knetfile.o $(OMISC)/bedidx.o 7 | PROG= bcftools 8 | INCLUDES= 9 | SUBDIRS= . 10 | 11 | .SUFFIXES:.c .o 12 | 13 | .c.o: 14 | $(CC) -c $(CFLAGS) $(DFLAGS) -I.. $(INCLUDES) $< -o $@ 15 | 16 | all-recur lib-recur clean-recur cleanlocal-recur install-recur: 17 | @target=`echo $@ | sed s/-recur//`; \ 18 | wdir=`pwd`; \ 19 | list='$(SUBDIRS)'; for subdir in $$list; do \ 20 | cd $$subdir; \ 21 | $(MAKE) CC="$(CC)" DFLAGS="$(DFLAGS)" CFLAGS="$(CFLAGS)" \ 22 | INCLUDES="$(INCLUDES)" LIBPATH="$(LIBPATH)" $$target || exit 1; \ 23 | cd $$wdir; \ 24 | done; 25 | 26 | all:$(PROG) 27 | 28 | lib:libbcf.a 29 | 30 | libbcf.a:$(LOBJS) 31 | $(AR) -csru $@ $(LOBJS) 32 | 33 | bcftools:lib $(AOBJS) 34 | $(CC) $(CFLAGS) -o $@ $(AOBJS) -L. $(LIBPATH) -lbcf -lm -lz -lpthread 35 | 36 | bcf.o:bcf.h 37 | vcf.o:bcf.h 38 | index.o:bcf.h 39 | bcfutils.o:bcf.h 40 | prob1.o:prob1.h bcf.h 41 | call1.o:prob1.h bcf.h 42 | bcf2qcall.o:bcf.h 43 | main.o:bcf.h 44 | 45 | bcf.pdf:bcf.tex 46 | pdflatex bcf 47 | 48 | cleanlocal: 49 | rm -fr gmon.out *.o a.out *.dSYM $(PROG) *~ *.a bcf.aux bcf.log bcf.pdf *.class libbcf.*.dylib libbcf.so* 50 | 51 | clean:cleanlocal-recur 52 | -------------------------------------------------------------------------------- /vendor/samtools/bcftools/kmin.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2008, 2010 by Attractive Chaos 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining 5 | a copy of this software and associated documentation files (the 6 | "Software"), to deal in the Software without restriction, including 7 | without limitation the rights to use, copy, modify, merge, publish, 8 | distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so, subject to 10 | the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | #ifndef KMIN_H 26 | #define KMIN_H 27 | 28 | #define KMIN_RADIUS 0.5 29 | #define KMIN_EPS 1e-7 30 | #define KMIN_MAXCALL 50000 31 | 32 | typedef double (*kmin_f)(int, double*, void*); 33 | typedef double (*kmin1_f)(double, void*); 34 | 35 | #ifdef __cplusplus 36 | extern "C" { 37 | #endif 38 | 39 | double kmin_hj(kmin_f func, int n, double *x, void *data, double r, double eps, int max_calls); 40 | double kmin_brent(kmin1_f func, double a, double b, void *data, double tol, double *xmin); 41 | 42 | #ifdef __cplusplus 43 | } 44 | #endif 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /vendor/samtools/bcftools/prob1.h: -------------------------------------------------------------------------------- 1 | #ifndef BCF_PROB1_H 2 | #define BCF_PROB1_H 3 | 4 | #include "bcf.h" 5 | 6 | struct __bcf_p1aux_t; 7 | typedef struct __bcf_p1aux_t bcf_p1aux_t; 8 | 9 | typedef struct { 10 | int rank0, perm_rank; // NB: perm_rank is always set to -1 by bcf_p1_cal() 11 | int ac; // ML alternative allele count 12 | double f_exp, f_flat, p_ref_folded, p_ref, p_var_folded, p_var; 13 | double cil, cih; 14 | double cmp[3], p_chi2, lrt; // used by contrast2() 15 | } bcf_p1rst_t; 16 | 17 | typedef struct { 18 | double p[4]; 19 | int mq, depth, is_tested, d[4]; 20 | } anno16_t; 21 | 22 | #define MC_PTYPE_FULL 1 23 | #define MC_PTYPE_COND2 2 24 | #define MC_PTYPE_FLAT 3 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | bcf_p1aux_t *bcf_p1_init(int n, uint8_t *ploidy); 31 | void bcf_p1_init_prior(bcf_p1aux_t *ma, int type, double theta); 32 | void bcf_p1_init_subprior(bcf_p1aux_t *ma, int type, double theta); 33 | void bcf_p1_destroy(bcf_p1aux_t *ma); 34 | void bcf_p1_set_ploidy(bcf1_t *b, bcf_p1aux_t *ma); 35 | int bcf_p1_cal(const bcf1_t *b, int do_contrast, bcf_p1aux_t *ma, bcf_p1rst_t *rst); 36 | int call_multiallelic_gt(bcf1_t *b, bcf_p1aux_t *ma, double threshold, int var_only); 37 | int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k); 38 | void bcf_p1_dump_afs(bcf_p1aux_t *ma); 39 | int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn); 40 | int bcf_p1_set_n1(bcf_p1aux_t *b, int n1); 41 | void bcf_p1_set_folded(bcf_p1aux_t *p1a); // only effective when set_n1() is not called 42 | 43 | int bcf_em1(const bcf1_t *b, int n1, int flag, double x[10]); 44 | 45 | #ifdef __cplusplus 46 | } 47 | #endif 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /vendor/samtools/binary.yaml: -------------------------------------------------------------------------------- 1 | [ 2 | ["./samtools", "bin"], 3 | ["./libbam.so", "lib"], 4 | ["./bam2bcf.h", "include"], 5 | ["./bam_endian.h", "include"], 6 | ["./bam.h", "include"], 7 | ["./bam_tview.h", "include"], 8 | ["./bgzf.h", "include"], 9 | ["./errmod.h", "include"], 10 | ["./faidx.h", "include"], 11 | ["./kaln.h", "include"], 12 | ["./khash.h", "include"], 13 | ["./klist.h", "include"], 14 | ["./knetfile.h", "include"], 15 | ["./kprobaln.h", "include"], 16 | ["./kseq.h", "include"], 17 | ["./ksort.h", "include"], 18 | ["./kstring.h", "include"], 19 | ["./razf.h", "include"], 20 | ["./sam.h", "include"], 21 | ["./sam_header.h", "include"], 22 | ["./sample.h", "include"], 23 | ["./bcftools/bcf.h", "include/bcftools"], 24 | ["./bcftools/kmin.h", "include/bcftools"], 25 | ["./bcftools/prob1.h", "include/bcftools"], 26 | ["./win32/xcurses.h", "include/win32"], 27 | ["./win32/zconf.h", "include/win32"], 28 | ["./win32/zlib.h", "include/win32"] 29 | ] 30 | -------------------------------------------------------------------------------- /vendor/samtools/deploy.yaml: -------------------------------------------------------------------------------- 1 | deployName: samtools 2 | -------------------------------------------------------------------------------- /vendor/samtools/errmod.h: -------------------------------------------------------------------------------- 1 | #ifndef ERRMOD_H 2 | #define ERRMOD_H 3 | 4 | #include 5 | 6 | struct __errmod_coef_t; 7 | 8 | typedef struct { 9 | double depcorr; 10 | struct __errmod_coef_t *coef; 11 | } errmod_t; 12 | 13 | errmod_t *errmod_init(float depcorr); 14 | void errmod_destroy(errmod_t *em); 15 | 16 | /* 17 | n: number of bases 18 | m: maximum base 19 | bases[i]: qual:6, strand:1, base:4 20 | q[i*m+j]: phred-scaled likelihood of (i,j) 21 | */ 22 | int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q); 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /vendor/samtools/examples/00README.txt: -------------------------------------------------------------------------------- 1 | File ex1.fa contains two sequences cut from the human genome 2 | build36. They were exatracted with command: 3 | 4 | samtools faidx human_b36.fa 2:2043966-2045540 20:67967-69550 5 | 6 | Sequence names were changed manually for simplicity. File ex1.sam.gz 7 | contains MAQ alignments exatracted with: 8 | 9 | (samtools view NA18507_maq.bam 2:2044001-2045500; 10 | samtools view NA18507_maq.bam 20:68001-69500) 11 | 12 | and processed with `samtools fixmate' to make it self-consistent as a 13 | standalone alignment. 14 | 15 | To try samtools, you may run the following commands: 16 | 17 | samtools faidx ex1.fa # index the reference FASTA 18 | samtools import ex1.fa.fai ex1.sam.gz ex1.bam # SAM->BAM 19 | samtools index ex1.bam # index BAM 20 | samtools tview ex1.bam ex1.fa # view alignment 21 | samtools pileup -cf ex1.fa ex1.bam # pileup and consensus 22 | samtools pileup -cf ex1.fa -t ex1.fa.fai ex1.sam.gz 23 | 24 | -------------------------------------------------------------------------------- /vendor/samtools/examples/bam2bed.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "sam.h" 3 | static int fetch_func(const bam1_t *b, void *data) 4 | { 5 | samfile_t *fp = (samfile_t*)data; 6 | uint32_t *cigar = bam1_cigar(b); 7 | const bam1_core_t *c = &b->core; 8 | int i, l; 9 | if (b->core.tid < 0) return 0; 10 | for (i = l = 0; i < c->n_cigar; ++i) { 11 | int op = cigar[i]&0xf; 12 | if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP) 13 | l += cigar[i]>>4; 14 | } 15 | printf("%s\t%d\t%d\t%s\t%d\t%c\n", fp->header->target_name[c->tid], 16 | c->pos, c->pos + l, bam1_qname(b), c->qual, (c->flag&BAM_FREVERSE)? '-' : '+'); 17 | return 0; 18 | } 19 | int main(int argc, char *argv[]) 20 | { 21 | samfile_t *fp; 22 | if (argc == 1) { 23 | fprintf(stderr, "Usage: bam2bed [region]\n"); 24 | return 1; 25 | } 26 | if ((fp = samopen(argv[1], "rb", 0)) == 0) { 27 | fprintf(stderr, "bam2bed: Fail to open BAM file %s\n", argv[1]); 28 | return 1; 29 | } 30 | if (argc == 2) { /* if a region is not specified */ 31 | bam1_t *b = bam_init1(); 32 | while (samread(fp, b) >= 0) fetch_func(b, fp); 33 | bam_destroy1(b); 34 | } else { 35 | int ref, beg, end; 36 | bam_index_t *idx; 37 | if ((idx = bam_index_load(argv[1])) == 0) { 38 | fprintf(stderr, "bam2bed: BAM indexing file is not available.\n"); 39 | return 1; 40 | } 41 | bam_parse_region(fp->header, argv[2], &ref, &beg, &end); 42 | if (ref < 0) { 43 | fprintf(stderr, "bam2bed: Invalid region %s\n", argv[2]); 44 | return 1; 45 | } 46 | bam_fetch(fp->x.bam, idx, ref, beg, end, fp, fetch_func); 47 | bam_index_destroy(idx); 48 | } 49 | samclose(fp); 50 | return 0; 51 | } 52 | -------------------------------------------------------------------------------- /vendor/samtools/examples/calDepth.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "sam.h" 3 | 4 | typedef struct { 5 | int beg, end; 6 | samfile_t *in; 7 | } tmpstruct_t; 8 | 9 | // callback for bam_fetch() 10 | static int fetch_func(const bam1_t *b, void *data) 11 | { 12 | bam_plbuf_t *buf = (bam_plbuf_t*)data; 13 | bam_plbuf_push(b, buf); 14 | return 0; 15 | } 16 | // callback for bam_plbuf_init() 17 | static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) 18 | { 19 | tmpstruct_t *tmp = (tmpstruct_t*)data; 20 | if ((int)pos >= tmp->beg && (int)pos < tmp->end) 21 | printf("%s\t%d\t%d\n", tmp->in->header->target_name[tid], pos + 1, n); 22 | return 0; 23 | } 24 | 25 | int main(int argc, char *argv[]) 26 | { 27 | tmpstruct_t tmp; 28 | if (argc == 1) { 29 | fprintf(stderr, "Usage: calDepth [region]\n"); 30 | return 1; 31 | } 32 | tmp.beg = 0; tmp.end = 0x7fffffff; 33 | tmp.in = samopen(argv[1], "rb", 0); 34 | if (tmp.in == 0) { 35 | fprintf(stderr, "Fail to open BAM file %s\n", argv[1]); 36 | return 1; 37 | } 38 | if (argc == 2) { // if a region is not specified 39 | sampileup(tmp.in, -1, pileup_func, &tmp); 40 | } else { 41 | int ref; 42 | bam_index_t *idx; 43 | bam_plbuf_t *buf; 44 | idx = bam_index_load(argv[1]); // load BAM index 45 | if (idx == 0) { 46 | fprintf(stderr, "BAM indexing file is not available.\n"); 47 | return 1; 48 | } 49 | bam_parse_region(tmp.in->header, argv[2], &ref, &tmp.beg, &tmp.end); // parse the region 50 | if (ref < 0) { 51 | fprintf(stderr, "Invalid region %s\n", argv[2]); 52 | return 1; 53 | } 54 | buf = bam_plbuf_init(pileup_func, &tmp); // initialize pileup 55 | bam_fetch(tmp.in->x.bam, idx, ref, tmp.beg, tmp.end, buf, fetch_func); 56 | bam_plbuf_push(0, buf); // finalize pileup 57 | bam_index_destroy(idx); 58 | bam_plbuf_destroy(buf); 59 | } 60 | samclose(tmp.in); 61 | return 0; 62 | } 63 | -------------------------------------------------------------------------------- /vendor/samtools/examples/ex1.sam.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/vendor/samtools/examples/ex1.sam.gz -------------------------------------------------------------------------------- /vendor/samtools/examples/toy.fa: -------------------------------------------------------------------------------- 1 | >ref 2 | AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT 3 | >ref2 4 | aggttttataaaacaattaagtctacagagcaactacgcg 5 | -------------------------------------------------------------------------------- /vendor/samtools/examples/toy.sam: -------------------------------------------------------------------------------- 1 | @SQ SN:ref LN:45 2 | @SQ SN:ref2 LN:40 3 | r001 163 ref 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 4 | r002 0 ref 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * 5 | r003 0 ref 9 30 5H6M * 0 0 AGCTAA * 6 | r004 0 ref 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * 7 | r003 16 ref 29 30 6H5M * 0 0 TAGGC * 8 | r001 83 ref 37 30 9M = 7 -39 CAGCGCCAT * 9 | x1 0 ref2 1 30 20M * 0 0 aggttttataaaacaaataa ???????????????????? 10 | x2 0 ref2 2 30 21M * 0 0 ggttttataaaacaaataatt ????????????????????? 11 | x3 0 ref2 6 30 9M4I13M * 0 0 ttataaaacAAATaattaagtctaca ?????????????????????????? 12 | x4 0 ref2 10 30 25M * 0 0 CaaaTaattaagtctacagagcaac ????????????????????????? 13 | x5 0 ref2 12 30 24M * 0 0 aaTaattaagtctacagagcaact ???????????????????????? 14 | x6 0 ref2 14 30 23M * 0 0 Taattaagtctacagagcaacta ??????????????????????? 15 | -------------------------------------------------------------------------------- /vendor/samtools/knetfile.h: -------------------------------------------------------------------------------- 1 | #ifndef KNETFILE_H 2 | #define KNETFILE_H 3 | 4 | #include 5 | #include 6 | 7 | #ifndef _WIN32 8 | #define netread(fd, ptr, len) read(fd, ptr, len) 9 | #define netwrite(fd, ptr, len) write(fd, ptr, len) 10 | #define netclose(fd) close(fd) 11 | #else 12 | #include 13 | #define netread(fd, ptr, len) recv(fd, ptr, len, 0) 14 | #define netwrite(fd, ptr, len) send(fd, ptr, len, 0) 15 | #define netclose(fd) closesocket(fd) 16 | #endif 17 | 18 | // FIXME: currently I/O is unbuffered 19 | 20 | #define KNF_TYPE_LOCAL 1 21 | #define KNF_TYPE_FTP 2 22 | #define KNF_TYPE_HTTP 3 23 | 24 | typedef struct knetFile_s { 25 | int type, fd; 26 | int64_t offset; 27 | char *host, *port; 28 | 29 | // the following are for FTP only 30 | int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready; 31 | char *response, *retr, *size_cmd; 32 | int64_t seek_offset; // for lazy seek 33 | int64_t file_size; 34 | 35 | // the following are for HTTP only 36 | char *path, *http_host; 37 | } knetFile; 38 | 39 | #define knet_tell(fp) ((fp)->offset) 40 | #define knet_fileno(fp) ((fp)->fd) 41 | 42 | #ifdef __cplusplus 43 | extern "C" { 44 | #endif 45 | 46 | #ifdef _WIN32 47 | int knet_win32_init(); 48 | void knet_win32_destroy(); 49 | #endif 50 | 51 | knetFile *knet_open(const char *fn, const char *mode); 52 | 53 | /* 54 | This only works with local files. 55 | */ 56 | knetFile *knet_dopen(int fd, const char *mode); 57 | 58 | /* 59 | If ->is_ready==0, this routine updates ->fd; otherwise, it simply 60 | reads from ->fd. 61 | */ 62 | off_t knet_read(knetFile *fp, void *buf, off_t len); 63 | 64 | /* 65 | This routine only sets ->offset and ->is_ready=0. It does not 66 | communicate with the FTP server. 67 | */ 68 | off_t knet_seek(knetFile *fp, int64_t off, int whence); 69 | int knet_close(knetFile *fp); 70 | 71 | #ifdef __cplusplus 72 | } 73 | #endif 74 | 75 | #endif 76 | -------------------------------------------------------------------------------- /vendor/samtools/kprobaln.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2003-2006, 2008, 2009 by Heng Li 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | #ifndef LH3_KPROBALN_H_ 27 | #define LH3_KPROBALN_H_ 28 | 29 | #include 30 | 31 | typedef struct { 32 | float d, e; 33 | int bw; 34 | } kpa_par_t; 35 | 36 | #ifdef __cplusplus 37 | extern "C" { 38 | #endif 39 | 40 | int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_query, const uint8_t *iqual, 41 | const kpa_par_t *c, int *state, uint8_t *q); 42 | 43 | #ifdef __cplusplus 44 | } 45 | #endif 46 | 47 | extern kpa_par_t kpa_par_def, kpa_par_alt; 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /vendor/samtools/misc/Makefile: -------------------------------------------------------------------------------- 1 | CC= gcc 2 | CXX= g++ 3 | CFLAGS= -g -Wall -O2 #-m64 #-arch ppc 4 | CXXFLAGS= $(CFLAGS) 5 | DFLAGS= -D_FILE_OFFSET_BITS=64 6 | OBJS= 7 | PROG= md5sum-lite md5fa maq2sam-short maq2sam-long ace2sam wgsim bamcheck 8 | INCLUDES= -I.. 9 | SUBDIRS= . 10 | 11 | .SUFFIXES:.c .o 12 | 13 | .c.o: 14 | $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@ 15 | 16 | all:$(PROG) 17 | 18 | lib-recur all-recur clean-recur cleanlocal-recur install-recur: 19 | @target=`echo $@ | sed s/-recur//`; \ 20 | wdir=`pwd`; \ 21 | list='$(SUBDIRS)'; for subdir in $$list; do \ 22 | cd $$subdir; \ 23 | $(MAKE) CC="$(CC)" DFLAGS="$(DFLAGS)" CFLAGS="$(CFLAGS)" \ 24 | INCLUDES="$(INCLUDES)" $$target || exit 1; \ 25 | cd $$wdir; \ 26 | done; 27 | 28 | lib: 29 | 30 | bamcheck:bamcheck.o 31 | $(CC) $(CFLAGS) -o $@ bamcheck.o -L.. -lm -lbam -lpthread -lz 32 | 33 | bamcheck.o:bamcheck.c ../faidx.h ../khash.h ../sam.h ../razf.h 34 | $(CC) $(CFLAGS) -c -I.. -o $@ bamcheck.c 35 | 36 | ace2sam:ace2sam.o 37 | $(CC) $(CFLAGS) -o $@ ace2sam.o -lz 38 | 39 | wgsim:wgsim.o 40 | $(CC) $(CFLAGS) -o $@ wgsim.o -lm -lz 41 | 42 | md5fa:md5.o md5fa.o md5.h ../kseq.h 43 | $(CC) $(CFLAGS) -o $@ md5.o md5fa.o -lz 44 | 45 | md5sum-lite:md5sum-lite.o 46 | $(CC) $(CFLAGS) -o $@ md5sum-lite.o 47 | 48 | md5sum-lite.o:md5.c md5.h 49 | $(CC) -c $(CFLAGS) -DMD5SUM_MAIN -o $@ md5.c 50 | 51 | maq2sam-short:maq2sam.c 52 | $(CC) $(CFLAGS) -o $@ maq2sam.c -lz 53 | 54 | maq2sam-long:maq2sam.c 55 | $(CC) $(CFLAGS) -DMAQ_LONGREADS -o $@ maq2sam.c -lz 56 | 57 | md5fa.o:md5.h md5fa.c 58 | $(CC) $(CFLAGS) -c -I.. -o $@ md5fa.c 59 | 60 | wgsim.o:wgsim.c ../kseq.h 61 | $(CC) $(CFLAGS) -c -I.. -o $@ wgsim.c 62 | 63 | ace2sam.o:ace2sam.c ../kstring.h ../kseq.h 64 | $(CC) $(CFLAGS) -c -I.. -o $@ ace2sam.c 65 | 66 | cleanlocal: 67 | rm -fr gmon.out *.o a.out *.exe *.dSYM $(PROG) *~ *.a 68 | 69 | clean:cleanlocal-recur 70 | -------------------------------------------------------------------------------- /vendor/samtools/misc/md5.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is adapted from a program in this page: 3 | 4 | http://www.fourmilab.ch/md5/ 5 | 6 | The original source code does not work on 64-bit machines due to the 7 | wrong typedef "uint32". I also added prototypes. 8 | 9 | -lh3 10 | */ 11 | 12 | #ifndef MD5_H 13 | #define MD5_H 14 | 15 | /* The following tests optimise behaviour on little-endian 16 | machines, where there is no need to reverse the byte order 17 | of 32 bit words in the MD5 computation. By default, 18 | HIGHFIRST is defined, which indicates we're running on a 19 | big-endian (most significant byte first) machine, on which 20 | the byteReverse function in md5.c must be invoked. However, 21 | byteReverse is coded in such a way that it is an identity 22 | function when run on a little-endian machine, so calling it 23 | on such a platform causes no harm apart from wasting time. 24 | If the platform is known to be little-endian, we speed 25 | things up by undefining HIGHFIRST, which defines 26 | byteReverse as a null macro. Doing things in this manner 27 | insures we work on new platforms regardless of their byte 28 | order. */ 29 | 30 | #define HIGHFIRST 31 | 32 | #if __LITTLE_ENDIAN__ != 0 33 | #undef HIGHFIRST 34 | #endif 35 | 36 | #include 37 | 38 | struct MD5Context { 39 | uint32_t buf[4]; 40 | uint32_t bits[2]; 41 | unsigned char in[64]; 42 | }; 43 | 44 | void MD5Init(struct MD5Context *ctx); 45 | void MD5Update(struct MD5Context *ctx, unsigned char *buf, unsigned len); 46 | void MD5Final(unsigned char digest[16], struct MD5Context *ctx); 47 | 48 | /* 49 | * This is needed to make RSAREF happy on some MS-DOS compilers. 50 | */ 51 | typedef struct MD5Context MD5_CTX; 52 | 53 | /* Define CHECK_HARDWARE_PROPERTIES to have main,c verify 54 | byte order and uint32_t settings. */ 55 | #define CHECK_HARDWARE_PROPERTIES 56 | 57 | #endif /* !MD5_H */ 58 | -------------------------------------------------------------------------------- /vendor/samtools/misc/md5fa.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "md5.h" 4 | #include "kseq.h" 5 | 6 | #define HEX_STR "0123456789abcdef" 7 | 8 | KSEQ_INIT(gzFile, gzread) 9 | 10 | static void md5_one(const char *fn) 11 | { 12 | MD5_CTX md5_one, md5_all; 13 | int l, i, k; 14 | gzFile fp; 15 | kseq_t *seq; 16 | unsigned char unordered[16], digest[16]; 17 | 18 | for (l = 0; l < 16; ++l) unordered[l] = 0; 19 | fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r"); 20 | if (fp == 0) { 21 | fprintf(stderr, "md5fa: %s: No such file or directory\n", fn); 22 | exit(1); 23 | } 24 | 25 | MD5Init(&md5_all); 26 | seq = kseq_init(fp); 27 | while ((l = kseq_read(seq)) >= 0) { 28 | for (i = k = 0; i < seq->seq.l; ++i) { 29 | if (islower(seq->seq.s[i])) seq->seq.s[k++] = toupper(seq->seq.s[i]); 30 | else if (isupper(seq->seq.s[i])) seq->seq.s[k++] = seq->seq.s[i]; 31 | } 32 | MD5Init(&md5_one); 33 | MD5Update(&md5_one, (unsigned char*)seq->seq.s, k); 34 | MD5Final(digest, &md5_one); 35 | for (l = 0; l < 16; ++l) { 36 | printf("%c%c", HEX_STR[digest[l]>>4&0xf], HEX_STR[digest[l]&0xf]); 37 | unordered[l] ^= digest[l]; 38 | } 39 | printf(" %s %s\n", fn, seq->name.s); 40 | MD5Update(&md5_all, (unsigned char*)seq->seq.s, k); 41 | } 42 | MD5Final(digest, &md5_all); 43 | kseq_destroy(seq); 44 | for (l = 0; l < 16; ++l) 45 | printf("%c%c", HEX_STR[digest[l]>>4&0xf], HEX_STR[digest[l]&0xf]); 46 | printf(" %s >ordered\n", fn); 47 | for (l = 0; l < 16; ++l) 48 | printf("%c%c", HEX_STR[unordered[l]>>4&0xf], HEX_STR[unordered[l]&0xf]); 49 | printf(" %s >unordered\n", fn); 50 | } 51 | 52 | int main(int argc, char *argv[]) 53 | { 54 | int i; 55 | if (argc == 1) md5_one("-"); 56 | else for (i = 1; i < argc; ++i) md5_one(argv[i]); 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /vendor/samtools/sam_header.h: -------------------------------------------------------------------------------- 1 | #ifndef __SAM_HEADER_H__ 2 | #define __SAM_HEADER_H__ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void *sam_header_parse2(const char *headerText); 9 | void *sam_header_merge(int n, const void **dicts); 10 | void sam_header_free(void *header); 11 | char *sam_header_write(const void *headerDict); // returns a newly allocated string 12 | 13 | /* 14 | // Usage example 15 | const char *key, *val; 16 | void *iter = sam_header_parse2(bam->header->text); 17 | while ( iter = sam_header_key_val(iter, "RG","ID","SM" &key,&val) ) printf("%s\t%s\n", key,val); 18 | */ 19 | void *sam_header2key_val(void *iter, const char type[2], const char key_tag[2], const char value_tag[2], const char **key, const char **value); 20 | char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n); 21 | 22 | /* 23 | // Usage example 24 | int i, j, n; 25 | const char *tags[] = {"SN","LN","UR","M5",NULL}; 26 | void *dict = sam_header_parse2(bam->header->text); 27 | char **tbl = sam_header2tbl_n(h->dict, "SQ", tags, &n); 28 | for (i=0; i 5 | 6 | static inline int bam_is_big_endian() 7 | { 8 | long one= 1; 9 | return !(*((char *)(&one))); 10 | } 11 | static inline uint16_t bam_swap_endian_2(uint16_t v) 12 | { 13 | return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); 14 | } 15 | static inline void *bam_swap_endian_2p(void *x) 16 | { 17 | *(uint16_t*)x = bam_swap_endian_2(*(uint16_t*)x); 18 | return x; 19 | } 20 | static inline uint32_t bam_swap_endian_4(uint32_t v) 21 | { 22 | v = ((v & 0x0000FFFFU) << 16) | (v >> 16); 23 | return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); 24 | } 25 | static inline void *bam_swap_endian_4p(void *x) 26 | { 27 | *(uint32_t*)x = bam_swap_endian_4(*(uint32_t*)x); 28 | return x; 29 | } 30 | static inline uint64_t bam_swap_endian_8(uint64_t v) 31 | { 32 | v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); 33 | v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); 34 | return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); 35 | } 36 | static inline void *bam_swap_endian_8p(void *x) 37 | { 38 | *(uint64_t*)x = bam_swap_endian_8(*(uint64_t*)x); 39 | return x; 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /vendor/tabix/binary.yaml: -------------------------------------------------------------------------------- 1 | [ 2 | ["bgzip", "bin"], 3 | ["tabix", "bin"], 4 | ["libtabix.so", "lib"], 5 | ["ksort.h", "include"], 6 | ["tabix.h", "include"], 7 | ["bgzf.h", "include"], 8 | ["kseq.h", "include"], 9 | ["khash.h", "include"], 10 | ["knetfile.h", "include"], 11 | ["kstring.h", "include"], 12 | ["bam_endian.h", "include"] 13 | ] 14 | -------------------------------------------------------------------------------- /vendor/tabix/deploy.yaml: -------------------------------------------------------------------------------- 1 | deployName: tabix 2 | -------------------------------------------------------------------------------- /vendor/tabix/example.gtf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/vendor/tabix/example.gtf.gz -------------------------------------------------------------------------------- /vendor/tabix/example.gtf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genomics-dev/wecall/b079d0d0787b94938ff4488224c3900a3c9c6731/vendor/tabix/example.gtf.gz.tbi -------------------------------------------------------------------------------- /vendor/tabix/knetfile.h: -------------------------------------------------------------------------------- 1 | #ifndef KNETFILE_H 2 | #define KNETFILE_H 3 | 4 | #include 5 | #include 6 | 7 | #ifndef _WIN32 8 | #define netread(fd, ptr, len) read(fd, ptr, len) 9 | #define netwrite(fd, ptr, len) write(fd, ptr, len) 10 | #define netclose(fd) close(fd) 11 | #else 12 | #include 13 | #define netread(fd, ptr, len) recv(fd, ptr, len, 0) 14 | #define netwrite(fd, ptr, len) send(fd, ptr, len, 0) 15 | #define netclose(fd) closesocket(fd) 16 | #endif 17 | 18 | // FIXME: currently I/O is unbuffered 19 | 20 | #define KNF_TYPE_LOCAL 1 21 | #define KNF_TYPE_FTP 2 22 | #define KNF_TYPE_HTTP 3 23 | 24 | typedef struct knetFile_s { 25 | int type, fd; 26 | int64_t offset; 27 | char *host, *port; 28 | 29 | // the following are for FTP only 30 | int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready; 31 | char *response, *retr, *size_cmd; 32 | int64_t seek_offset; // for lazy seek 33 | int64_t file_size; 34 | 35 | // the following are for HTTP only 36 | char *path, *http_host; 37 | } knetFile; 38 | 39 | #define knet_tell(fp) ((fp)->offset) 40 | #define knet_fileno(fp) ((fp)->fd) 41 | 42 | #ifdef __cplusplus 43 | extern "C" { 44 | #endif 45 | 46 | #ifdef _WIN32 47 | int knet_win32_init(); 48 | void knet_win32_destroy(); 49 | #endif 50 | 51 | knetFile *knet_open(const char *fn, const char *mode); 52 | 53 | /* 54 | This only works with local files. 55 | */ 56 | knetFile *knet_dopen(int fd, const char *mode); 57 | 58 | /* 59 | If ->is_ready==0, this routine updates ->fd; otherwise, it simply 60 | reads from ->fd. 61 | */ 62 | off_t knet_read(knetFile *fp, void *buf, off_t len); 63 | 64 | /* 65 | This routine only sets ->offset and ->is_ready=0. It does not 66 | communicate with the FTP server. 67 | */ 68 | off_t knet_seek(knetFile *fp, int64_t off, int whence); 69 | int knet_close(knetFile *fp); 70 | 71 | #ifdef __cplusplus 72 | } 73 | #endif 74 | 75 | #endif 76 | -------------------------------------------------------------------------------- /vendor/tabix/kstring.h: -------------------------------------------------------------------------------- 1 | #ifndef KSTRING_H 2 | #define KSTRING_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #ifndef kroundup32 9 | #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 10 | #endif 11 | 12 | #ifndef KSTRING_T 13 | #define KSTRING_T kstring_t 14 | typedef struct __kstring_t { 15 | size_t l, m; 16 | char *s; 17 | } kstring_t; 18 | #endif 19 | 20 | int ksprintf(kstring_t *s, const char *fmt, ...); 21 | int ksplit_core(char *s, int delimiter, int *_max, int **_offsets); 22 | 23 | // calculate the auxiliary array, allocated by calloc() 24 | int *ksBM_prep(const uint8_t *pat, int m); 25 | 26 | /* Search pat in str and returned the list of matches. The size of the 27 | * list is returned as n_matches. _prep is the array returned by 28 | * ksBM_prep(). If it is a NULL pointer, ksBM_prep() will be called. */ 29 | int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches); 30 | 31 | static inline int kputsn(const char *p, int l, kstring_t *s) 32 | { 33 | if (s->l + l + 1 >= s->m) { 34 | s->m = s->l + l + 2; 35 | kroundup32(s->m); 36 | s->s = (char*)realloc(s->s, s->m); 37 | } 38 | strncpy(s->s + s->l, p, l); 39 | s->l += l; 40 | s->s[s->l] = 0; 41 | return l; 42 | } 43 | 44 | static inline int kputs(const char *p, kstring_t *s) 45 | { 46 | return kputsn(p, strlen(p), s); 47 | } 48 | 49 | static inline int kputc(int c, kstring_t *s) 50 | { 51 | if (s->l + 1 >= s->m) { 52 | s->m = s->l + 2; 53 | kroundup32(s->m); 54 | s->s = (char*)realloc(s->s, s->m); 55 | } 56 | s->s[s->l++] = c; 57 | s->s[s->l] = 0; 58 | return c; 59 | } 60 | 61 | static inline int *ksplit(kstring_t *s, int delimiter, int *n) 62 | { 63 | int max = 0, *offsets = 0; 64 | *n = ksplit_core(s->s, delimiter, &max, &offsets); 65 | return offsets; 66 | } 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /vendor/tabix/perl/MANIFEST: -------------------------------------------------------------------------------- 1 | MANIFEST 2 | typemap 3 | Tabix.xs 4 | Tabix.pm 5 | TabixIterator.pm 6 | Makefile.PL 7 | t/01local.t 8 | t/02remote.t -------------------------------------------------------------------------------- /vendor/tabix/perl/Makefile.PL: -------------------------------------------------------------------------------- 1 | use ExtUtils::MakeMaker; 2 | WriteMakefile( 3 | NAME => 'Tabix', 4 | VERSION_FROM => 'Tabix.pm', 5 | LIBS => ['-lz -L.. -ltabix'], 6 | DEFINE => '-D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE', 7 | INC => '-I..', 8 | ); 9 | -------------------------------------------------------------------------------- /vendor/tabix/perl/Tabix.pm: -------------------------------------------------------------------------------- 1 | package Tabix; 2 | 3 | use strict; 4 | use warnings; 5 | use Carp qw/croak/; 6 | 7 | use TabixIterator; 8 | 9 | require Exporter; 10 | 11 | our @ISA = qw/Exporter/; 12 | our @EXPORT = qw/tabix_open tabix_close tabix_read tabix_query tabix_getnames tabix_iter_free/; 13 | 14 | our $VERSION = '0.2.0'; 15 | 16 | require XSLoader; 17 | XSLoader::load('Tabix', $VERSION); 18 | 19 | sub new { 20 | my $invocant = shift; 21 | my %args = @_; 22 | $args{-data} || croak("-data argument required"); 23 | my $class = ref($invocant) || $invocant; 24 | my $self = {}; 25 | bless($self, $class); 26 | $self->open($args{-data}, $args{-index}); 27 | return $self; 28 | } 29 | 30 | sub open { 31 | my ($self, $fn, $fnidx) = @_; 32 | $self->close; 33 | $self->{_fn} = $fn; 34 | $self->{_fnidx} = $fnidx; 35 | $self->{_} = $fnidx? tabix_open($fn, $fnidx) : tabix_open($fn); 36 | } 37 | 38 | sub close { 39 | my $self = shift; 40 | if ($self->{_}) { 41 | tabix_close($self->{_}); 42 | delete($self->{_}); delete($self->{_fn}); delete($self->{_fnidx}); 43 | } 44 | } 45 | 46 | sub DESTROY { 47 | my $self = shift; 48 | $self->close; 49 | } 50 | 51 | sub query { 52 | my $self = shift; 53 | my $iter; 54 | if (@_) { 55 | $iter = tabix_query($self->{_}, @_); 56 | } else { 57 | $iter = tabix_query($self->{_}); 58 | } 59 | my $i = TabixIterator->new; 60 | $i->set($iter); 61 | return $i; 62 | } 63 | 64 | sub read { 65 | my $self = shift; 66 | my $iter = shift; 67 | return tabix_read($self->{_}, $iter->get); 68 | } 69 | 70 | sub getnames { 71 | my $self = shift; 72 | return tabix_getnames($self->{_}); 73 | } 74 | 75 | 1; 76 | __END__ 77 | -------------------------------------------------------------------------------- /vendor/tabix/perl/Tabix.xs: -------------------------------------------------------------------------------- 1 | #include "EXTERN.h" 2 | #include "perl.h" 3 | #include "XSUB.h" 4 | 5 | #include 6 | #include "tabix.h" 7 | 8 | MODULE = Tabix PACKAGE = Tabix 9 | 10 | tabix_t* 11 | tabix_open(fn, fnidx=0) 12 | char *fn 13 | char *fnidx 14 | CODE: 15 | RETVAL = ti_open(fn, fnidx); 16 | OUTPUT: 17 | RETVAL 18 | 19 | void 20 | tabix_close(t) 21 | tabix_t *t 22 | CODE: 23 | ti_close(t); 24 | 25 | ti_iter_t 26 | tabix_query(t, seq=0, beg=0, end=0x7fffffff) 27 | tabix_t *t 28 | const char *seq 29 | int beg 30 | int end 31 | PREINIT: 32 | CODE: 33 | RETVAL = ti_query(t, seq, beg, end); 34 | OUTPUT: 35 | RETVAL 36 | 37 | SV* 38 | tabix_read(t, iter) 39 | tabix_t *t 40 | ti_iter_t iter 41 | PREINIT: 42 | const char *s; 43 | int len; 44 | CODE: 45 | s = ti_read(t, iter, &len); 46 | if (s == 0) 47 | return XSRETURN_EMPTY; 48 | RETVAL = newSVpv(s, len); 49 | OUTPUT: 50 | RETVAL 51 | 52 | void 53 | tabix_getnames(t) 54 | tabix_t *t 55 | PREINIT: 56 | const char **names; 57 | int i, n; 58 | PPCODE: 59 | ti_lazy_index_load(t); 60 | names = ti_seqname(t->idx, &n); 61 | for (i = 0; i < n; ++i) 62 | XPUSHs(sv_2mortal(newSVpv(names[i], 0))); 63 | free(names); 64 | 65 | MODULE = Tabix PACKAGE = TabixIterator 66 | 67 | void 68 | tabix_iter_free(iter) 69 | ti_iter_t iter 70 | CODE: 71 | ti_iter_destroy(iter); 72 | -------------------------------------------------------------------------------- /vendor/tabix/perl/TabixIterator.pm: -------------------------------------------------------------------------------- 1 | package TabixIterator; 2 | 3 | use strict; 4 | use warnings; 5 | use Carp qw/croak/; 6 | 7 | require Exporter; 8 | 9 | our @ISA = qw/Exporter/; 10 | our @EXPORT = qw/tabix_iter_free/; 11 | 12 | our $VERSION = '0.2.0'; 13 | 14 | require XSLoader; 15 | XSLoader::load('Tabix', $VERSION); 16 | 17 | sub new { 18 | my $invocant = shift; 19 | my $class = ref($invocant) || $invocant; 20 | my $self = {}; 21 | bless($self, $class); 22 | return $self; 23 | } 24 | 25 | sub set { 26 | my ($self, $iter) = @_; 27 | $self->{_} = $iter; 28 | } 29 | 30 | sub get { 31 | my $self = shift; 32 | return $self->{_}; 33 | } 34 | 35 | sub DESTROY { 36 | my $self = shift; 37 | tabix_iter_free($self->{_}) if ($self->{_}); 38 | } 39 | 40 | 1; 41 | __END__ 42 | -------------------------------------------------------------------------------- /vendor/tabix/perl/t/01local.t: -------------------------------------------------------------------------------- 1 | #-*-Perl-*- 2 | use Test::More tests => 9; 3 | BEGIN { use_ok('Tabix') }; 4 | 5 | { # C-like low-level interface 6 | my $t = tabix_open("../example.gtf.gz"); 7 | ok($t); 8 | my $iter = tabix_query($t, "chr1", 0, 2000); 9 | ok($iter); 10 | $_ = 0; 11 | ++$_ while (tabix_read($t, $iter)); 12 | is($_, 6); 13 | tabix_iter_free($iter); 14 | @_ = tabix_getnames($t); 15 | is(scalar(@_), 2); 16 | } 17 | 18 | { # OOP high-level interface 19 | my $t = Tabix->new(-data=>"../example.gtf.gz"); 20 | ok($t); 21 | my $iter = $t->query("chr1", 3000, 5000); 22 | ok($iter); 23 | $_ = 0; 24 | ++$_ while ($t->read($iter)); 25 | is($_, 27); 26 | @_ = $t->getnames; 27 | is($_[1], "chr2"); 28 | } 29 | -------------------------------------------------------------------------------- /vendor/tabix/perl/t/02remote.t: -------------------------------------------------------------------------------- 1 | #-*-Perl-*- 2 | use Test::More tests => 9; 3 | BEGIN { use_ok('Tabix') }; 4 | 5 | { # FTP access 6 | my $t = Tabix->new(-data=>"ftp://ftp.ncbi.nih.gov/1000genomes/ftp/pilot_data/release/2010_03/pilot1/CEU.SRP000031.2010_03.genotypes.vcf.gz"); 7 | ok($t); 8 | my $iter = $t->query("1", 1000000, 1100000); 9 | ok($iter); 10 | $_ = 0; 11 | ++$_ while ($t->read($iter)); 12 | is($_, 306); 13 | @_ = $t->getnames; 14 | is(scalar(@_), 22); 15 | } 16 | 17 | { # FTP access plus FTP index 18 | my $t = Tabix->new(-data=>"ftp://ftp.ncbi.nih.gov/1000genomes/ftp/pilot_data/release/2010_03/pilot1/CEU.SRP000031.2010_03.genotypes.vcf.gz", 19 | -index=>"ftp://ftp.ncbi.nih.gov/1000genomes/ftp/pilot_data/release/2010_03/pilot1/CEU.SRP000031.2010_03.genotypes.vcf.gz.tbi"); 20 | ok($t); 21 | my $iter = $t->query("19", 10000000, 10100000); 22 | ok($iter); 23 | $_ = 0; 24 | ++$_ while ($t->read($iter)); 25 | is($_, 268); 26 | @_ = $t->getnames; 27 | is(scalar(@_), 22); 28 | } 29 | -------------------------------------------------------------------------------- /vendor/tabix/perl/typemap: -------------------------------------------------------------------------------- 1 | TYPEMAP 2 | tabix_t* T_PTROBJ 3 | ti_iter_t T_PTROBJ -------------------------------------------------------------------------------- /vendor/tabix/version.yaml: -------------------------------------------------------------------------------- 1 | major: 0 2 | minor: 2 3 | patch: 6 4 | --------------------------------------------------------------------------------