├── tests ├── vcfcov │ ├── calls2 │ │ ├── sample.fa │ │ └── Makefile │ ├── calls0 │ │ ├── calls.vcf │ │ ├── truth.cov.vcf │ │ └── Makefile │ ├── ref │ │ └── ref.fa │ ├── calls3 │ │ ├── hardy.fa │ │ ├── laurel.fa │ │ └── calls.vcf │ ├── calls4 │ │ ├── calls.vcf │ │ ├── pluto.fa │ │ └── truth.cov.vcf │ ├── calls5 │ │ ├── calls.vcf │ │ └── truth.cov.vcf │ ├── calls1 │ │ ├── jane.fa │ │ ├── calls.vcf │ │ ├── john.fa │ │ └── Makefile │ └── Makefile ├── bubbles │ ├── bubbles5 │ │ └── truth.vcf │ ├── Makefile │ └── bubbles2 │ │ └── Makefile ├── pjoin │ ├── Makefile │ ├── pjoin1 │ │ └── Makefile │ └── pjoin0 │ │ └── Makefile ├── pop_bubbles │ ├── Makefile │ ├── pop_bubbles1 │ │ └── Makefile │ └── pop_bubbles2 │ │ └── Makefile ├── build │ └── Makefile ├── clean_graph │ ├── Makefile │ └── clean2 │ │ └── Makefile ├── lossless │ └── Makefile ├── breakpoint │ ├── Makefile │ ├── breakpoint0 │ │ └── Makefile │ └── breakpoint2 │ │ └── Makefile ├── threading │ ├── Makefile │ ├── threading1 │ │ └── Makefile │ └── threading3 │ │ └── Makefile ├── subgraph_unitigs │ └── Makefile ├── coverage │ └── Makefile ├── largeK │ └── Makefile ├── graphviz │ └── Makefile ├── run.sh ├── path_check │ └── Makefile ├── unitigs │ └── Makefile └── sort │ └── Makefile ├── scripts ├── bash │ ├── links-count.sh │ ├── links-bytes.sh │ ├── links-median-threshold.sh │ ├── mccortex-to-ray.sh │ └── vcf-longest-haplotype.sh ├── build │ ├── multik-build.sh │ ├── update.sh │ └── mccortex ├── cortex_print_flanks.sh ├── R │ ├── install-deps.R │ ├── plot-length-hist.R │ └── plot-link-dist-cov.R ├── analysis │ ├── mummer2vcf.sh │ ├── sam-count-vars.pl │ └── mapping-vars-test.sh ├── perl │ ├── LineReader.pm │ ├── mccortex-header.pl │ ├── bubbles-example.pl │ ├── bubbles-to-contigs.pl │ └── breakpoints-example.pl ├── calculations │ ├── c-within-rand-a-b-bounds.pl │ └── bloom-filter-fpr.pl ├── report │ ├── make-kmer-plot.sh │ └── make-link-plot.sh ├── make-isec.sh └── seq2pdf.sh ├── results ├── data │ ├── download.sh │ ├── ecoli │ │ ├── README.txt │ │ └── Makefile │ ├── PhiX │ │ └── about.txt │ └── chr22 │ │ ├── uniq_flanks │ │ ├── README.txt │ │ └── Makefile │ │ ├── about.txt │ │ └── Makefile ├── kmer_size_experiment │ ├── results │ │ ├── 20170206mon_chr22_28M │ │ │ ├── err-corr-plots │ │ │ │ └── Makefile │ │ │ ├── stoch.pdf │ │ │ ├── perfect.pdf │ │ │ ├── stocherr.pdf │ │ │ ├── plain-vs-pe.pdf │ │ │ ├── linkcounts.pe.pdf │ │ │ ├── linkcounts.se.pdf │ │ │ ├── pe-vs-sga-errs.pdf │ │ │ ├── pe-vs-sga-ng50.pdf │ │ │ ├── perfect_no_pe.pdf │ │ │ ├── plain-vs-links.pdf │ │ │ ├── stocherrcorr.pdf │ │ │ ├── plain-vs-pe-corr.pdf │ │ │ ├── corr-pe-vs-sga-errs.pdf │ │ │ ├── corr-pe-vs-sga-ng50.pdf │ │ │ ├── links-vs-sga-errs.pdf │ │ │ ├── links-vs-sga-ng50.pdf │ │ │ ├── plain-vs-links-corr.pdf │ │ │ ├── stoch.linkcounts.pe.pdf │ │ │ ├── stoch.linkcounts.se.pdf │ │ │ ├── perfect.linkcounts.pe.pdf │ │ │ ├── perfect.linkcounts.se.pdf │ │ │ ├── corr-links-vs-sga-errs.pdf │ │ │ ├── corr-links-vs-sga-ng50.pdf │ │ │ ├── corr-pe-vs-raw-sga-errs.pdf │ │ │ ├── corr-pe-vs-raw-sga-ng50.pdf │ │ │ ├── stoch.plain.csv │ │ │ ├── stocherr.linkcounts.pe.pdf │ │ │ ├── stocherr.linkcounts.se.pdf │ │ │ ├── corr-links-vs-raw-sga-errs.pdf │ │ │ ├── corr-links-vs-raw-sga-ng50.pdf │ │ │ ├── perfect.links.csv │ │ │ ├── perfect.pe.csv │ │ │ ├── perfect.plain.csv │ │ │ ├── stoch.links.csv │ │ │ ├── stocherr.pe.csv │ │ │ ├── stocherr.plain.csv │ │ │ ├── stocherrcorr.linkcounts.pe.pdf │ │ │ ├── stocherrcorr.linkcounts.se.pdf │ │ │ ├── stoch.pe.csv │ │ │ ├── stocherr.links.csv │ │ │ ├── stocherr.sga.csv │ │ │ ├── stocherrcorr.pe.csv │ │ │ ├── stocherrcorr.plain.csv │ │ │ ├── stocherrcorr.links.csv │ │ │ ├── stocherrcorr.sga.csv │ │ │ ├── perfect.linkcounts.se.csv │ │ │ ├── stoch.linkcounts.se.csv │ │ │ ├── stoch.linkcounts.pe.csv │ │ │ ├── stocherr.linkcounts.pe.csv │ │ │ ├── stocherr.linkcounts.se.csv │ │ │ ├── stocherrcorr.linkcounts.se.csv │ │ │ ├── perfect.linkcounts.pe.csv │ │ │ ├── stocherrcorr.linkcounts.pe.csv │ │ │ ├── bad.edges.csv │ │ │ ├── cleaning.corr.table.csv │ │ │ └── cleaning.table.csv │ │ ├── 20161012wed │ │ │ ├── stoch.pdf │ │ │ ├── perfect.pdf │ │ │ ├── stocherr.pdf │ │ │ ├── perfect_nope.pdf │ │ │ ├── stocherrcorr.pdf │ │ │ ├── perfect.plain.csv │ │ │ ├── stoch.plain.csv │ │ │ ├── stocherr.pe.csv │ │ │ ├── stocherr.plain.csv │ │ │ ├── perfect.links.csv │ │ │ ├── stoch.links.csv │ │ │ ├── stoch.pe.csv │ │ │ ├── stocherr.links.csv │ │ │ ├── stocherrcorr.plain.csv │ │ │ ├── perfect.pe.csv │ │ │ ├── stocherrcorr.links.csv │ │ │ ├── stocherrcorr.pe.csv │ │ │ ├── cleaning.corr.table.csv │ │ │ ├── cleaning.table.csv │ │ │ └── bad.edges.csv │ │ ├── 20160912mon │ │ │ ├── stoch.cov.pdf │ │ │ ├── perfect.cov.pdf │ │ │ ├── stocherr.cov.pdf │ │ │ ├── perfect.plain.csv │ │ │ ├── stoch.plain.csv │ │ │ ├── perfect.links.csv │ │ │ ├── stoch.links.csv │ │ │ ├── stocherr.plain.csv │ │ │ ├── stocherr.links.csv │ │ │ ├── notes.txt │ │ │ └── seqn.errors.csv │ │ ├── 20170211sat_chr22_28M_nomissing │ │ │ ├── README.md │ │ │ ├── stoch.pdf │ │ │ ├── perfect.pdf │ │ │ ├── stocherr.pdf │ │ │ ├── plain-vs-pe.pdf │ │ │ ├── pe-vs-sga-errs.pdf │ │ │ ├── pe-vs-sga-ng50.pdf │ │ │ ├── perfect_no_pe.pdf │ │ │ ├── plain-vs-links.pdf │ │ │ ├── stocherrcorr.pdf │ │ │ ├── plain-vs-pe-corr.pdf │ │ │ ├── corr-pe-vs-sga-errs.pdf │ │ │ ├── corr-pe-vs-sga-ng50.pdf │ │ │ ├── links-vs-sga-errs.pdf │ │ │ ├── links-vs-sga-ng50.pdf │ │ │ ├── plain-vs-links-corr.pdf │ │ │ ├── stoch.plain.csv │ │ │ ├── perfect.links.csv │ │ │ ├── perfect.pe.csv │ │ │ ├── perfect.plain.csv │ │ │ ├── stoch.links.csv │ │ │ ├── stocherr.plain.csv │ │ │ ├── corr-links-vs-sga-errs.pdf │ │ │ ├── corr-links-vs-sga-ng50.pdf │ │ │ ├── stoch.pe.csv │ │ │ ├── stocherr.links.csv │ │ │ ├── stocherr.pe.csv │ │ │ ├── stocherr.sga.csv │ │ │ ├── stocherrcorr.plain.csv │ │ │ ├── stocherrcorr.sga.csv │ │ │ ├── stocherrcorr.links.csv │ │ │ ├── stocherrcorr.pe.csv │ │ │ ├── bad.edges.csv │ │ │ ├── cleaning.corr.table.csv │ │ │ └── cleaning.table.csv │ │ ├── 20160929thurs │ │ │ ├── stoch.cov.pdf │ │ │ ├── perfect.cov.pdf │ │ │ ├── stocherr.cov.pdf │ │ │ ├── perfect_no_pe.pdf │ │ │ ├── stoch.plain.csv │ │ │ ├── stocherr.plain.csv │ │ │ ├── perfect.plain.csv │ │ │ ├── stoch.links.csv │ │ │ ├── stoch.pe.csv │ │ │ ├── stocherr.links.csv │ │ │ ├── stocherr.pe.csv │ │ │ ├── perfect.links.csv │ │ │ ├── perfect.pe.csv │ │ │ ├── cleaning.table.csv │ │ │ └── bad.edges.csv │ │ ├── 20170211sat_chr22_28M_confidstep │ │ │ ├── README.md │ │ │ ├── stoch.pdf │ │ │ ├── perfect.pdf │ │ │ ├── stocherr.pdf │ │ │ ├── perfect_no_pe.pdf │ │ │ ├── plain-vs-pe.pdf │ │ │ ├── stocherrcorr.pdf │ │ │ ├── pe-vs-sga-errs.pdf │ │ │ ├── pe-vs-sga-ng50.pdf │ │ │ ├── plain-vs-links.pdf │ │ │ ├── links-vs-sga-errs.pdf │ │ │ ├── links-vs-sga-ng50.pdf │ │ │ ├── plain-vs-pe-corr.pdf │ │ │ ├── stoch.plain.csv │ │ │ ├── corr-pe-vs-sga-errs.pdf │ │ │ ├── corr-pe-vs-sga-ng50.pdf │ │ │ ├── perfect.links.csv │ │ │ ├── perfect.plain.csv │ │ │ ├── plain-vs-links-corr.pdf │ │ │ ├── stoch.links.csv │ │ │ ├── stocherr.plain.csv │ │ │ ├── corr-links-vs-sga-errs.pdf │ │ │ ├── corr-links-vs-sga-ng50.pdf │ │ │ ├── perfect.pe.csv │ │ │ ├── stoch.pe.csv │ │ │ ├── stocherr.pe.csv │ │ │ ├── stocherrcorr.plain.csv │ │ │ ├── stocherr.links.csv │ │ │ ├── stocherr.sga.csv │ │ │ ├── stocherrcorr.sga.csv │ │ │ ├── stocherrcorr.links.csv │ │ │ ├── stocherrcorr.pe.csv │ │ │ ├── bad.edges.csv │ │ │ ├── cleaning.corr.table.csv │ │ │ └── cleaning.table.csv │ │ ├── make-csv.sh │ │ ├── count-links.pl │ │ └── plot-link-counts.R │ ├── notes.txt │ └── sga-all-kmers.sh ├── file_buffering │ ├── results20150413mon.mac.csv │ ├── results20150413mon.mac.txt │ ├── README.txt │ └── file-buffering.sh ├── klebsiella │ └── kleb_pneumoniae │ │ ├── indels │ │ └── about.txt │ │ ├── assembly │ │ └── get-max-covg.sh │ │ ├── freebayes │ │ ├── analysis.sh │ │ └── freebayes.sh │ │ ├── platypus │ │ ├── analysis.sh │ │ └── call-platypus.sh │ │ ├── cortex │ │ └── analysis.sh │ │ ├── mcrun │ │ └── analysis.sh │ │ └── large_events │ │ └── large-events-plot.R ├── benchmark │ ├── 10diploid10X │ │ ├── run-sim.sh │ │ └── smaller.fa │ ├── minidiploid │ │ └── run-sim.sh │ └── diploid60X │ │ └── run-sim.sh ├── hash_table_benchmark │ ├── results20150409thurs.mac.txt │ ├── stats.R │ └── results20150409thurs.linux.txt ├── correct │ └── hg_chr22 │ │ └── README.txt ├── README.txt ├── contig_confidence │ ├── Makefile │ └── confidence.tex ├── traversal_conjecture │ └── README.txt ├── var_calling_10ecoli │ ├── results │ │ ├── 20150510_sun_initrun │ │ │ └── 20150510.sun.txt.4 │ │ └── 20150615_joint_1by1_links_plain │ │ │ └── 20150617.wed.stats.txt │ └── about.txt └── var_calling_diploid_chr22_1Mbp │ └── about.txt ├── libs ├── misc │ ├── README.md │ ├── mem_size.h │ ├── Makefile │ └── jenkins.h ├── maximal_substrs │ └── Makefile └── cJSON │ ├── tests │ ├── test2 │ ├── test1 │ ├── test3 │ └── test5 │ └── LICENSE ├── .gitignore ├── AUTHORS ├── src ├── kmer │ ├── kmer_size.h │ └── kmer_size.c ├── basic │ ├── str_parsing.h │ ├── common_buffers.h │ ├── str_parsing.c │ ├── hash_mem.h │ ├── decomp_breakpoint.h │ ├── hash.h │ ├── decomp_bubble.h │ ├── range.h │ ├── chrom_pos_list.h │ ├── graph_info.h │ └── seq_loading_stats.h ├── tools │ ├── infer_edges.h │ ├── correct_reads.h │ ├── pop_bubbles.h │ ├── vcf_coverage.h │ └── generate_paths.h ├── graph │ ├── graph_format.h │ ├── graph_search.h │ ├── prune_nodes.h │ ├── graph_format.c │ ├── graph_step.c │ ├── contig_confidence.h │ └── db_unitig.h ├── global │ ├── cortex_types.h │ ├── global.c │ └── ctx_assert.c ├── paths │ └── gpath_follow.h └── alignment │ └── correct_aln_input.h ├── dev └── bkmer_revcmp │ └── Makefile ├── travis ├── script.sh ├── install.sh └── provision-vm.sh ├── LICENSE └── .travis.yml /tests/vcfcov/calls2/sample.fa: -------------------------------------------------------------------------------- 1 | >ref 2 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGG 3 | -------------------------------------------------------------------------------- /scripts/bash/links-count.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eou pipefail 4 | 5 | zcat -fcd $1 | grep -c '^[FR] ' 6 | -------------------------------------------------------------------------------- /results/data/download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -euo pipefail 2 | 3 | for d in chr22 ecoli 4 | do 5 | cd $d 6 | make 7 | cd .. 8 | done 9 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/err-corr-plots/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: 3 | pdflatex errcorr 4 | 5 | .PHONY: all -------------------------------------------------------------------------------- /scripts/build/multik-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | for k in 31 63 95 127 6 | do 7 | make MAXK=$k $@ 8 | done 9 | -------------------------------------------------------------------------------- /scripts/bash/links-bytes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eou pipefail 4 | 5 | zcat -fcd $1 | grep '^[FR] ' | awk '{x=x+int(($2+3)/4)}END{print x}' 6 | -------------------------------------------------------------------------------- /results/data/ecoli/README.txt: -------------------------------------------------------------------------------- 1 | 2 | Ecoli K12 reference genome 3 | 4 | http://www.ncbi.nlm.nih.gov/nuccore/NC_000913.3 5 | 6 | Download with: `make` 7 | 8 | -------------------------------------------------------------------------------- /results/file_buffering/results20150413mon.mac.csv: -------------------------------------------------------------------------------- 1 | 0.312 2 | 0.315 3 | 0.314 4 | 0.327 5 | 0.328 6 | 0.096 7 | 0.097 8 | 0.097 9 | 0.098 10 | 0.102 11 | -------------------------------------------------------------------------------- /scripts/cortex_print_flanks.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | set -euo pipefail 3 | 4 | gzip -fcd $@ | awk -F '[ \t]' 'm{print $0;m=0;} /^>bubble\..*\.5pflank/{print $1; m=1;}' 5 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/stoch.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20161012wed/stoch.pdf -------------------------------------------------------------------------------- /results/klebsiella/kleb_pneumoniae/indels/about.txt: -------------------------------------------------------------------------------- 1 | Isaac Turner 2 | 2015-12-21 3 | 4 | Plot indel distribution from freebayes, platypus, cortex, mccortex 5 | 6 | ./run.sh 7 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160912mon/stoch.cov.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20160912mon/stoch.cov.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/perfect.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20161012wed/perfect.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/stocherr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20161012wed/stocherr.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/README.md: -------------------------------------------------------------------------------- 1 | 2 | human `chr22:28,000,000-28,999,999`, contigs assembled with `--no-missing-check` argument. 3 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160912mon/perfect.cov.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20160912mon/perfect.cov.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160929thurs/stoch.cov.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20160929thurs/stoch.cov.pdf -------------------------------------------------------------------------------- /libs/misc/README.md: -------------------------------------------------------------------------------- 1 | Misc third party code used in McCortex 2 | 3 | CityHash is written in C++ so we pull in a re-written version in C from: 4 | https://github.com/nusov/cityhash-c 5 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160912mon/stocherr.cov.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20160912mon/stocherr.cov.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160929thurs/perfect.cov.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20160929thurs/perfect.cov.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160929thurs/stocherr.cov.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20160929thurs/stocherr.cov.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/perfect_nope.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20161012wed/perfect_nope.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/stocherrcorr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20161012wed/stocherrcorr.pdf -------------------------------------------------------------------------------- /results/file_buffering/results20150413mon.mac.txt: -------------------------------------------------------------------------------- 1 | Reading: results20150413mon.csv 2 | Rows: 10 3 | 1 [1:5] mean: 0.3192 stddev: 0.007661593 4 | 2 [6:10] mean: 0.098 stddev: 0.002345208 5 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160929thurs/perfect_no_pe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20160929thurs/perfect_no_pe.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/README.md: -------------------------------------------------------------------------------- 1 | 2 | human `chr22:28,000,000-28,999,999`, contigs assembled with `--no-missing-check --confid-step 0.8` argument. 3 | -------------------------------------------------------------------------------- /scripts/build/update.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -oeu pipefail 4 | 5 | cd libs && make clean && cd .. 6 | make clean 7 | git pull 8 | git submodule update --init --recursive 9 | make 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-pe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-pe.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/linkcounts.pe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/linkcounts.pe.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/linkcounts.se.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/linkcounts.se.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/pe-vs-sga-errs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/pe-vs-sga-errs.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/pe-vs-sga-ng50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/pe-vs-sga-ng50.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect_no_pe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect_no_pe.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-links.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-links.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-pe-corr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-pe-corr.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stoch.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stoch.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stoch.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stoch.pdf -------------------------------------------------------------------------------- /libs/maximal_substrs/Makefile: -------------------------------------------------------------------------------- 1 | 2 | maxsubstrs: maxsubstrs.c stream_buffer.h 3 | $(CC) -Wall -Wextra -o $@ $< 4 | 5 | all: maxsubstrs 6 | 7 | clean: 8 | rm -rf maxsubstrs 9 | 10 | .PHONY: all clean 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-sga-errs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-sga-errs.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-sga-ng50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-sga-ng50.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/links-vs-sga-errs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/links-vs-sga-errs.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/links-vs-sga-ng50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/links-vs-sga-ng50.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-links-corr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-links-corr.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.linkcounts.pe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.linkcounts.pe.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.linkcounts.se.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.linkcounts.se.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/perfect.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/perfect.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherr.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/perfect.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/perfect.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherr.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.linkcounts.pe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.linkcounts.pe.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.linkcounts.se.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.linkcounts.se.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-pe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-pe.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-sga-errs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-sga-errs.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-sga-ng50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-sga-ng50.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-raw-sga-errs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-raw-sga-errs.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-raw-sga-ng50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-raw-sga-ng50.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1920,0 3 | 31,3161,0 4 | 41,5928,0 5 | 51,12729,0 6 | 61,27317,0 7 | 71,71586,0 8 | 81,75608,0 9 | 91,16878,0 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.linkcounts.pe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.linkcounts.pe.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.linkcounts.se.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.linkcounts.se.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/perfect_no_pe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/perfect_no_pe.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-pe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-pe.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherrcorr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherrcorr.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/pe-vs-sga-errs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/pe-vs-sga-errs.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/pe-vs-sga-ng50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/pe-vs-sga-ng50.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/perfect_no_pe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/perfect_no_pe.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-links.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-links.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherrcorr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherrcorr.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160912mon/perfect.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,969,0 3 | 31,1729,0 4 | 41,2909,0 5 | 51,5501,0 6 | 61,13480,0 7 | 71,21068,0 8 | 81,38623,0 9 | 91,48050,0 10 | 99,52592,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160912mon/stoch.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,969,0 3 | 31,1729,0 4 | 41,2909,0 5 | 51,5501,0 6 | 61,13480,0 7 | 71,21068,0 8 | 81,38623,0 9 | 91,18068,0 10 | 99,108,1 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160929thurs/stoch.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,969,0 3 | 31,1729,0 4 | 41,2909,0 5 | 51,5501,0 6 | 61,13480,0 7 | 71,21068,0 8 | 81,38623,0 9 | 91,16634,0 10 | 99,108,5 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160929thurs/stocherr.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,961,55 3 | 31,1729,19 4 | 41,2909,1 5 | 51,5501,0 6 | 61,13278,0 7 | 71,21068,0 8 | 81,11700,0 9 | 91,138,1 10 | 99,21,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/perfect.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,969,0 3 | 31,1729,0 4 | 41,2909,0 5 | 51,5501,0 6 | 61,13480,0 7 | 71,21068,0 8 | 81,38623,0 9 | 91,48050,0 10 | 99,52592,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/stoch.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,969,0 3 | 31,1729,0 4 | 41,2909,0 5 | 51,5501,0 6 | 61,13480,0 7 | 71,21068,0 8 | 81,38623,0 9 | 91,16634,0 10 | 99,108,5 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/stocherr.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1343,127 3 | 31,2755,83 4 | 41,5821,53 5 | 51,14476,22 6 | 61,36204,7 7 | 71,48050,1 8 | 81,12589,0 9 | 91,138,1 10 | 99,21,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/stocherr.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,961,55 3 | 31,1729,19 4 | 41,2909,1 5 | 51,5501,0 6 | 61,13278,0 7 | 71,21068,0 8 | 81,11700,0 9 | 91,138,1 10 | 99,21,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-raw-sga-errs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-raw-sga-errs.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-raw-sga-ng50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-raw-sga-ng50.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,94840,0 3 | 31,94840,0 4 | 41,94840,0 5 | 51,94840,0 6 | 61,94840,0 7 | 71,94840,0 8 | 81,94840,0 9 | 91,94840,0 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,22583,7 3 | 31,52417,9 4 | 41,76714,6 5 | 51,85267,8 6 | 61,125711,5 7 | 71,125711,2 8 | 81,125659,0 9 | 91,123940,0 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1920,0 3 | 31,3161,0 4 | 41,5928,0 5 | 51,12729,0 6 | 61,27317,0 7 | 71,71586,0 8 | 81,75608,0 9 | 91,94391,0 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,94690,4 3 | 31,94690,3 4 | 41,94391,2 5 | 51,94391,2 6 | 61,94391,0 7 | 71,94391,0 8 | 81,94391,0 9 | 91,17625,0 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,2539,106 3 | 31,5982,59 4 | 41,13935,29 5 | 51,73223,19 6 | 61,59423,3 7 | 71,67617,10 8 | 81,14442,4 9 | 91,139,4 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1866,15 3 | 31,3126,6 4 | 41,5928,0 5 | 51,12729,0 6 | 61,27317,0 7 | 71,60471,0 8 | 81,14272,0 9 | 91,139,4 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.linkcounts.pe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.linkcounts.pe.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.linkcounts.se.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.linkcounts.se.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/pe-vs-sga-errs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/pe-vs-sga-errs.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/pe-vs-sga-ng50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/pe-vs-sga-ng50.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-links.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-links.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-pe-corr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-pe-corr.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160912mon/perfect.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,52592,1 3 | 31,52592,1 4 | 41,52592,1 5 | 51,52592,0 6 | 61,52592,0 7 | 71,52592,0 8 | 81,52592,0 9 | 91,52592,0 10 | 99,52592,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160912mon/stoch.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,74568,6 3 | 31,74568,4 4 | 41,60396,3 5 | 51,52592,0 6 | 61,52592,0 7 | 71,52592,0 8 | 81,52592,0 9 | 91,18978,0 10 | 99,108,1 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160912mon/stocherr.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,957,40 3 | 31,1729,1 4 | 41,2909,0 5 | 51,5501,0 6 | 61,13480,0 7 | 71,21068,0 8 | 81,12431,0 9 | 91,137,3 10 | 99,108675,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160929thurs/perfect.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,969,0 3 | 31,1729,0 4 | 41,2909,0 5 | 51,5501,0 6 | 61,13480,0 7 | 71,21068,0 8 | 81,38623,0 9 | 91,48050,0 10 | 99,52592,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160929thurs/stoch.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,74568,5 3 | 31,74568,4 4 | 41,74568,3 5 | 51,52592,0 6 | 61,52592,0 7 | 71,52592,0 8 | 81,52592,0 9 | 91,17998,0 10 | 99,108,5 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160929thurs/stoch.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,42887,10 3 | 31,68825,5 4 | 41,93607,5 5 | 51,102363,3 6 | 61,164617,2 7 | 71,181787,2 8 | 81,228304,1 9 | 91,23945,0 10 | 99,108,5 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160929thurs/stocherr.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1552,135 3 | 31,2655,86 4 | 41,5732,71 5 | 51,10941,20 6 | 61,31864,5 7 | 71,31864,1 8 | 81,11749,0 9 | 91,138,1 10 | 99,21,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160929thurs/stocherr.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1343,127 3 | 31,2755,83 4 | 41,5821,53 5 | 51,14476,22 6 | 61,36204,7 7 | 71,48050,1 8 | 81,12589,0 9 | 91,138,1 10 | 99,21,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/perfect.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,52592,1 3 | 31,52592,1 4 | 41,52592,1 5 | 51,52592,0 6 | 61,52592,0 7 | 71,52592,0 8 | 81,52592,0 9 | 91,52592,0 10 | 99,52592,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/stoch.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,74568,5 3 | 31,74568,4 4 | 41,74568,3 5 | 51,52592,0 6 | 61,52592,0 7 | 71,52592,0 8 | 81,52592,0 9 | 91,17998,0 10 | 99,108,5 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/stoch.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,42887,10 3 | 31,68825,5 4 | 41,93607,5 5 | 51,102363,3 6 | 61,164617,2 7 | 71,181787,2 8 | 81,228304,1 9 | 91,23945,0 10 | 99,108,5 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/stocherr.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1552,135 3 | 31,2655,86 4 | 41,5732,71 5 | 51,10941,20 6 | 61,31864,5 7 | 71,31864,1 8 | 81,11749,0 9 | 91,138,1 10 | 99,21,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/stocherrcorr.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,959,3 3 | 31,1763,0 4 | 41,2909,0 5 | 51,5501,0 6 | 61,13278,0 7 | 71,21068,0 8 | 81,38623,0 9 | 91,10786,5 10 | 99,21,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,24439,10 3 | 31,52419,18 4 | 41,76714,12 5 | 51,91821,18 6 | 61,125711,10 7 | 71,125711,4 8 | 81,124051,2 9 | 91,18437,1 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,2609,101 3 | 31,5690,58 4 | 41,13546,39 5 | 51,33392,27 6 | 61,59423,10 7 | 71,67616,1 8 | 81,14272,1 9 | 91,139,4 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.sga.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,2010,344 3 | 31,3232,357 4 | 41,4706,345 5 | 51,7206,226 6 | 61,12176,108 7 | 71,38131,21 8 | 81,93964,5 9 | 91,16165,5 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,2706,106 3 | 31,4690,65 4 | 41,12934,26 5 | 51,58441,15 6 | 61,74941,4 7 | 71,89246,8 8 | 81,120171,3 9 | 91,13454,6 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1920,11 3 | 31,3126,0 4 | 41,5928,0 5 | 51,12729,0 6 | 61,27317,0 7 | 71,71586,0 8 | 81,75608,0 9 | 91,13382,5 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/links-vs-sga-errs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/links-vs-sga-errs.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/links-vs-sga-ng50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/links-vs-sga-ng50.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-pe-corr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-pe-corr.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stoch.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1920,0 3 | 31,3161,0 4 | 41,5928,0 5 | 51,12729,0 6 | 61,27317,0 7 | 71,71586,0 8 | 81,75608,0 9 | 91,16878,0 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-pe-vs-sga-errs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-pe-vs-sga-errs.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-pe-vs-sga-ng50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-pe-vs-sga-ng50.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/links-vs-sga-errs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/links-vs-sga-errs.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/links-vs-sga-ng50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/links-vs-sga-ng50.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-links-corr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-links-corr.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stoch.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1920,0 3 | 31,3161,0 4 | 41,5928,0 5 | 51,12729,0 6 | 61,27317,0 7 | 71,71586,0 8 | 81,75608,0 9 | 91,16878,0 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160912mon/stocherr.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1335,89 3 | 31,3512,55 4 | 41,5173,70 5 | 51,11842,28 6 | 61,22376,8 7 | 71,31866,0 8 | 81,13029,0 9 | 91,137,3 10 | 99,108675,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160929thurs/perfect.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,52592,1 3 | 31,52592,1 4 | 41,52592,1 5 | 51,52592,0 6 | 61,52592,0 7 | 71,52592,0 8 | 81,52592,0 9 | 91,52592,0 10 | 99,52592,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160929thurs/perfect.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,42887,5 3 | 31,56913,4 4 | 41,68833,3 5 | 51,102361,3 6 | 61,164617,2 7 | 71,181787,2 8 | 81,181787,1 9 | 91,228304,0 10 | 99,228304,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/perfect.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,42887,5 3 | 31,56913,4 4 | 41,68833,3 5 | 51,102361,3 6 | 61,164617,2 7 | 71,181787,2 8 | 81,181787,1 9 | 91,228304,0 10 | 99,228304,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/stocherrcorr.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1826,97 3 | 31,3025,85 4 | 41,5580,76 5 | 51,12459,26 6 | 61,31866,13 7 | 71,28039,1 8 | 81,42610,0 9 | 91,11077,5 10 | 99,21,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/stocherrcorr.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1548,97 3 | 31,2653,68 4 | 41,6189,65 5 | 51,15642,18 6 | 61,34269,12 7 | 71,39181,0 8 | 81,50743,0 9 | 91,11697,5 10 | 99,21,0 11 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,2849,98 3 | 31,4395,86 4 | 41,13780,41 5 | 51,41104,25 6 | 61,75074,12 7 | 71,75599,0 8 | 81,94391,0 9 | 91,13390,5 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.sga.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,2010,736 3 | 31,3176,765 4 | 41,4032,735 5 | 51,5845,357 6 | 61,10533,137 7 | 71,30368,24 8 | 81,93964,7 9 | 91,16165,4 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-pe-vs-sga-errs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-pe-vs-sga-errs.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-pe-vs-sga-ng50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-pe-vs-sga-ng50.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/perfect.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,94392,0 3 | 31,94392,0 4 | 41,94392,0 5 | 51,94392,0 6 | 61,94392,0 7 | 71,94392,0 8 | 81,94392,0 9 | 91,94392,0 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/perfect.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1920,0 3 | 31,3161,0 4 | 41,5928,0 5 | 51,12729,0 6 | 61,27317,0 7 | 71,71586,0 8 | 81,75608,0 9 | 91,94391,0 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-links-corr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-links-corr.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stoch.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,94392,5 3 | 31,94392,4 4 | 41,94391,3 5 | 51,94391,2 6 | 61,94391,2 7 | 71,94391,1 8 | 81,94391,1 9 | 91,17625,0 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherr.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1866,15 3 | 31,3126,6 4 | 41,5928,0 5 | 51,12729,0 6 | 61,27317,0 7 | 71,60471,0 8 | 81,14272,0 9 | 91,139,4 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/perfect.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,94840,0 3 | 31,94840,0 4 | 41,94840,0 5 | 51,94840,0 6 | 61,94840,0 7 | 71,94840,0 8 | 81,94840,0 9 | 91,94840,0 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/perfect.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,22583,8 3 | 31,52417,9 4 | 41,76714,6 5 | 51,85267,8 6 | 61,125711,8 7 | 71,125711,7 8 | 81,125659,2 9 | 91,123940,1 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/perfect.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1920,0 3 | 31,3161,0 4 | 41,5928,0 5 | 51,12729,0 6 | 61,27317,0 7 | 71,71586,0 8 | 81,75608,0 9 | 91,94391,0 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stoch.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,94690,4 3 | 31,94690,3 4 | 41,94391,2 5 | 51,94391,2 6 | 61,94391,2 7 | 71,94391,1 8 | 81,94391,1 9 | 91,17625,0 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherr.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1866,15 3 | 31,3126,6 4 | 41,5928,0 5 | 51,12729,0 6 | 61,27317,0 7 | 71,60471,0 8 | 81,14272,0 9 | 91,139,4 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-links-vs-sga-errs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-links-vs-sga-errs.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-links-vs-sga-ng50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-links-vs-sga-ng50.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/perfect.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,22583,7 3 | 31,52417,9 4 | 41,76714,6 5 | 51,85267,8 6 | 61,125711,8 7 | 71,125711,7 8 | 81,125659,2 9 | 91,123940,1 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stoch.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,24439,8 3 | 31,50982,19 4 | 41,76714,12 5 | 51,91821,18 6 | 61,125711,16 7 | 71,125711,14 8 | 81,125659,5 9 | 91,19076,3 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherr.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,6155,763 3 | 31,7638,540 4 | 41,25027,225 5 | 51,74994,77 6 | 61,75074,52 7 | 71,72574,17 8 | 81,14494,10 9 | 91,139,4 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherrcorr.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1920,11 3 | 31,3126,0 4 | 41,5928,0 5 | 51,12729,0 6 | 61,27317,0 7 | 71,71586,0 8 | 81,75608,0 9 | 91,13382,5 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-links-vs-sga-errs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-links-vs-sga-errs.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-links-vs-sga-ng50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-links-vs-sga-ng50.pdf -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stoch.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,24439,10 3 | 31,52419,14 4 | 41,76714,12 5 | 51,91821,18 6 | 61,125711,16 7 | 71,125711,14 8 | 81,125659,5 9 | 91,19076,3 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherr.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,5676,710 3 | 31,7289,570 4 | 41,18591,182 5 | 51,44614,74 6 | 61,67584,40 7 | 71,75599,14 8 | 81,14421,9 9 | 91,139,4 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherr.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,6248,745 3 | 31,8251,536 4 | 41,25027,225 5 | 51,74994,76 6 | 61,75074,48 7 | 71,72574,17 8 | 81,14494,10 9 | 91,139,4 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherr.sga.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,2010,344 3 | 31,3232,357 4 | 41,4706,345 5 | 51,7206,226 6 | 61,12176,108 7 | 71,38131,21 8 | 81,93964,5 9 | 91,16165,5 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherrcorr.plain.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,1920,11 3 | 31,3126,0 4 | 41,5928,0 5 | 51,12729,0 6 | 61,27317,0 7 | 71,71586,0 8 | 81,75608,0 9 | 91,13382,5 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherr.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,5555,737 3 | 31,6687,548 4 | 41,18591,182 5 | 51,44614,73 6 | 61,67584,41 7 | 71,75599,14 8 | 81,14421,9 9 | 91,139,4 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherr.sga.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,2010,344 3 | 31,3232,357 4 | 41,4706,345 5 | 51,7206,226 6 | 61,12176,108 7 | 71,38131,21 8 | 81,93964,5 9 | 91,16165,5 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherrcorr.sga.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,2010,736 3 | 31,3176,765 4 | 41,4032,735 5 | 51,5845,357 6 | 61,10533,137 7 | 71,30368,24 8 | 81,93964,7 9 | 91,16165,4 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherrcorr.sga.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,2010,736 3 | 31,3176,765 4 | 41,4032,735 5 | 51,5845,357 6 | 61,10533,137 7 | 71,30368,24 8 | 81,93964,7 9 | 91,16165,4 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherrcorr.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,6630,558 3 | 31,6976,586 4 | 41,27260,147 5 | 51,50329,84 6 | 61,74915,45 7 | 71,93763,16 8 | 81,94391,5 9 | 91,13390,5 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherrcorr.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,7439,560 3 | 31,8287,518 4 | 41,30490,168 5 | 51,75065,90 6 | 61,75501,44 7 | 71,94643,17 8 | 81,100768,18 9 | 91,13644,13 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherrcorr.links.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,4205,570 3 | 31,6976,549 4 | 41,18580,158 5 | 51,50329,84 6 | 61,74915,45 7 | 71,93763,16 8 | 81,94391,5 9 | 91,13390,5 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherrcorr.pe.csv: -------------------------------------------------------------------------------- 1 | K,NG50,AssemblyErrors 2 | 21,4136,542 3 | 31,8355,524 4 | 41,22086,206 5 | 51,75065,97 6 | 61,75501,44 7 | 71,94643,17 8 | 81,100768,18 9 | 91,13644,13 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /build/ 3 | /libs 4 | /docs 5 | /src/global/version.h 6 | /tests 7 | /results 8 | /benchmark 9 | /dev 10 | *.RData 11 | *.Rhistory 12 | *.DS_Store 13 | *.pyc 14 | *.vagrant 15 | 16 | /commit.txt 17 | /notes.txt 18 | /tests.txt 19 | -------------------------------------------------------------------------------- /results/data/PhiX/about.txt: -------------------------------------------------------------------------------- 1 | Illumina PhiX data 2 | 3 | PhiX generated from: http://blog.basespace.illumina.com/2012/10/02/new-miseq-datasets/ 4 | 5 | zcat /data/illumina/PhiX_S1_L001_R1_001.fastq.gz | \ 6 | head -4000000 | tail -400000 | gzip -c > PhiX.100K.1.fq.gz 7 | -------------------------------------------------------------------------------- /tests/bubbles/bubbles5/truth.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##fileDate=20151111 3 | ##reference=ref/ref.fa 4 | ##contig= 5 | ##FORMAT= 6 | #CHROM POS ID REF ALT QUAL FILTER INFO GT MrBond 7 | ref 49 . T A . PASS . GT 0/1 8 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Isaac Turner (turner.isaac@gmail.com) 2 | PhD Supervisor: Prof Gil McVean 3 | 4 | cortex_var: 5 | Zam Iqbal (zam@well.ox.ac.uk) 6 | Mario Caccamo (mario.caccamo@bbsrc.ac.uk) 7 | 8 | Bug reports, profiling, feedback, documentation etc.: 9 | Kiran Garimella 10 | Torsten Seemann 11 | Jerome Kelleher 12 | -------------------------------------------------------------------------------- /libs/cJSON/tests/test2: -------------------------------------------------------------------------------- 1 | {"menu": { 2 | "id": "file", 3 | "value": "File", 4 | "popup": { 5 | "menuitem": [ 6 | {"value": "New", "onclick": "CreateNewDoc()"}, 7 | {"value": "Open", "onclick": "OpenDoc()"}, 8 | {"value": "Close", "onclick": "CloseDoc()"} 9 | ] 10 | } 11 | }} 12 | -------------------------------------------------------------------------------- /tests/vcfcov/calls0/calls.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.2 2 | ##FILTER= 3 | ##fileDate=20151014 4 | ##reference=ref/ref.fa 5 | ##contig= 6 | ##FORMAT= 7 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT MasterGrunt 8 | -------------------------------------------------------------------------------- /results/file_buffering/README.txt: -------------------------------------------------------------------------------- 1 | Record time to read a sequence file buffered vs un-buffered 2 | 3 | ./file-buffering.sh ../data/chr22/chr22.fa > results20150413mon.mac.csv 4 | ../hash_table_benchmark/stats.R results20150413mon.mac.csv > results20150413mon.mac.txt 5 | 6 | Mac files generated on my laptop (MacBook Pro) 7 | -------------------------------------------------------------------------------- /src/kmer/kmer_size.h: -------------------------------------------------------------------------------- 1 | #ifndef KMER_SIZE_H_ 2 | #define KMER_SIZE_H_ 3 | 4 | // Files that are not compiled with MIN_KMER_SIZE and MAX_KMER_SIZE link to 5 | // this object file and discover kmer size limits at run time 6 | 7 | int get_min_kmer_size(); 8 | int get_max_kmer_size(); 9 | 10 | #endif /* KMER_SIZE_H_ */ 11 | -------------------------------------------------------------------------------- /src/basic/str_parsing.h: -------------------------------------------------------------------------------- 1 | #ifndef STR_PARSING_H_ 2 | #define STR_PARSING_H_ 3 | 4 | #include "common_buffers.h" 5 | 6 | // Parse a comma separated list e.g. "12,3,12" 7 | // Returns <0 on error, otherwise number of chars used 8 | int comma_list_to_array(const char *str, SizeBuffer *nums); 9 | 10 | #endif /* STR_PARSING_H_ */ 11 | -------------------------------------------------------------------------------- /src/kmer/kmer_size.c: -------------------------------------------------------------------------------- 1 | #include "kmer_size.h" 2 | 3 | // Files that are not compiled with MIN_KMER_SIZE and MAX_KMER_SIZE link to 4 | // this object file and discover kmer size limits at run time 5 | 6 | int get_min_kmer_size() 7 | { 8 | return MIN_KMER_SIZE; 9 | } 10 | 11 | int get_max_kmer_size() 12 | { 13 | return MAX_KMER_SIZE; 14 | } 15 | -------------------------------------------------------------------------------- /dev/bkmer_revcmp/Makefile: -------------------------------------------------------------------------------- 1 | NWORDS=1 2 | SHELL:=/bin/bash 3 | 4 | all: revcmp 5 | 6 | clean: 7 | rm -rf revcmp 8 | 9 | revcmp: revcmp.c 10 | $(CC) -O4 -Wall -Wextra -DNUM_BKMER_WORDS=$(NWORDS) -o $@ $< 11 | 12 | profile: 13 | for i in {1..5}; do for m in {0..4}; do time ./revcmp -m $$m -n 1000000000; done; done 14 | 15 | .PHONY: all clean profile 16 | -------------------------------------------------------------------------------- /tests/pjoin/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash -euo pipefail 2 | 3 | # Each test case is in a separate sub-directory 4 | 5 | # pjoin0: 6 | # pjoin1: 7 | 8 | all: 9 | cd pjoin0 && $(MAKE) 10 | cd pjoin1 && $(MAKE) 11 | @echo "All looks good." 12 | 13 | clean: 14 | cd pjoin0 && $(MAKE) clean 15 | cd pjoin1 && $(MAKE) clean 16 | 17 | .PHONY: all clean 18 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/make-csv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eou pipefail 3 | 4 | echo "K,NG50,AssemblyErrors" 5 | for f in $@ 6 | do 7 | K=`echo $f | grep -oE 'k[0-9]+' | grep -oE -m1 '[0-9]+$'` 8 | NG50=`grep 'NG50:' $f | grep -oE '[0-9]+$'` 9 | ERRORS=`grep 'assembly_errors:' $f | grep -oE '[0-9]+$'` 10 | echo "$K,$NG50,$ERRORS" 11 | done 12 | -------------------------------------------------------------------------------- /tests/vcfcov/ref/ref.fa: -------------------------------------------------------------------------------- 1 | >chr0 2 | ATATTGATCCCC 3 | >ref 4 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG 5 | >chr1 6 | TGGGCCAGTACGGTGAATCCCTGATGATCCGCATAGTTTGTAAGTCAAAATGGCGACCGGTCGGTGGGTGTGTAGAGCAACCGGAAAGCTTGCCTTATAG 7 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.linkcounts.se.csv: -------------------------------------------------------------------------------- 1 | K n_graph_kmers n_link_kmers n_links link_junction_mem 2 | 21 943524 14735 498094 1.7MB 3 | 31 973199 6547 63108 114.8KB 4 | 41 985939 2981 11506 13.5KB 5 | 51 991526 1325 3306 3.4KB 6 | 61 994205 654 1324 1.3KB 7 | 71 995708 349 554 554B 8 | 81 996639 159 228 228B 9 | 91 997273 62 74 74B 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.linkcounts.se.csv: -------------------------------------------------------------------------------- 1 | K n_graph_kmers n_link_kmers n_links link_junction_mem 2 | 21 943521 14735 407180 1.3MB 3 | 31 973196 6547 56366 100.6KB 4 | 41 985936 2980 10845 12.6KB 5 | 51 991523 1321 3188 3.3KB 6 | 61 994202 644 1284 1.3KB 7 | 71 995705 338 526 526B 8 | 81 996636 145 206 206B 9 | 91 997225 49 58 58B 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.linkcounts.pe.csv: -------------------------------------------------------------------------------- 1 | K n_graph_kmers n_link_kmers n_links link_junction_mem 2 | 21 943521 14739 985984 6.3MB 3 | 31 973196 6553 132965 385.8KB 4 | 41 985936 3010 26048 46KB 5 | 51 991523 1385 7719 10.8KB 6 | 61 994202 734 2760 3.3KB 7 | 71 995705 441 1130 1.2KB 8 | 81 996636 271 499 513B 9 | 91 997225 173 238 238B 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.linkcounts.pe.csv: -------------------------------------------------------------------------------- 1 | K n_graph_kmers n_link_kmers n_links link_junction_mem 2 | 21 944420 15846 521385 1.8MB 3 | 31 973255 6824 82265 168.1KB 4 | 41 985901 3030 14754 18.8KB 5 | 51 991494 1365 4509 4.9KB 6 | 61 994174 713 1857 1.9KB 7 | 71 995658 419 830 833B 8 | 81 995784 253 407 408B 9 | 91 409278 18 26 26B 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.linkcounts.se.csv: -------------------------------------------------------------------------------- 1 | K n_graph_kmers n_link_kmers n_links link_junction_mem 2 | 21 944420 15814 510453 1.7MB 3 | 31 973255 6789 67188 123.2KB 4 | 41 985901 3006 11976 14.1KB 5 | 51 991494 1331 3386 3.5KB 6 | 61 994174 647 1326 1.3KB 7 | 71 995658 343 543 543B 8 | 81 995784 150 215 215B 9 | 91 409278 18 26 26B 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.linkcounts.se.csv: -------------------------------------------------------------------------------- 1 | K n_graph_kmers n_link_kmers n_links link_junction_mem 2 | 21 943614 15479 506506 1.7MB 3 | 31 973203 6767 68039 124.5KB 4 | 41 985939 3014 12391 14.6KB 5 | 51 991526 1335 3554 3.7KB 6 | 61 994205 651 1408 1.4KB 7 | 71 995708 346 571 571B 8 | 81 996638 155 227 227B 9 | 91 994901 47 58 58B 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.linkcounts.pe.csv: -------------------------------------------------------------------------------- 1 | K n_graph_kmers n_link_kmers n_links link_junction_mem 2 | 21 943524 14739 1411791 9.9MB 3 | 31 973199 6553 161398 490.4KB 4 | 41 985939 3011 28853 51.7KB 5 | 51 991526 1387 8211 11.6KB 6 | 61 994205 739 2952 3.5KB 7 | 71 995708 447 1199 1.2KB 8 | 81 996639 278 537 556B 9 | 91 997273 184 273 273B 10 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.linkcounts.pe.csv: -------------------------------------------------------------------------------- 1 | K n_graph_kmers n_link_kmers n_links link_junction_mem 2 | 21 943614 15502 523847 1.8MB 3 | 31 973203 6792 72634 137KB 4 | 41 985939 3036 15252 19.6KB 5 | 51 991526 1375 5044 5.7KB 6 | 61 994205 718 2037 2.1KB 7 | 71 995708 423 849 851B 8 | 81 996638 257 420 420B 9 | 91 994901 152 201 201B 10 | -------------------------------------------------------------------------------- /tests/pop_bubbles/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash -euo pipefail 2 | 3 | # Test bubble popping 4 | # Each test case is in a separate sub-directory 5 | 6 | all: 7 | cd pop_bubbles1 && $(MAKE) 8 | cd pop_bubbles2 && $(MAKE) 9 | @echo "pop_bubbles: All looks good." 10 | 11 | clean: 12 | cd pop_bubbles1 && $(MAKE) clean 13 | cd pop_bubbles2 && $(MAKE) clean 14 | 15 | .PHONY: all clean 16 | -------------------------------------------------------------------------------- /results/klebsiella/kleb_pneumoniae/assembly/get-max-covg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # usage: get-max-covg.sh 4 | # output: 5 | 6 | genome=$1 7 | seqfile=$2 8 | 9 | dnacat -L "$seqfile" | cut -f2 | sort -rn | \ 10 | awk '{if(x+$1>'$genome'){exit;} x+=$1; n+=1; l=$1; if(!f){f=$1}} END{print n,x,f,l}' 11 | -------------------------------------------------------------------------------- /results/data/ecoli/Makefile: -------------------------------------------------------------------------------- 1 | 2 | SHELL:=/bin/bash -euo pipefail 3 | 4 | CTXDIR=../../../ 5 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat 6 | 7 | all: ecoli.fa 8 | 9 | NC_000913.fna: 10 | curl -o NC_000913.fna 'https://www.ncbi.nlm.nih.gov/nuccore/U00096.3?report=fasta&log$=seqview&format=text' 11 | 12 | ecoli.fa: NC_000913.fna 13 | $(DNACAT) -M <(echo EColiK12) $< > $@ 14 | 15 | .PHONY: all 16 | -------------------------------------------------------------------------------- /results/data/chr22/uniq_flanks/README.txt: -------------------------------------------------------------------------------- 1 | Isaac Turner 2 | 2014 Sep 24 3 | 4 | chr22.1Mbp.uniq.fa was generated by taking hg19 chr22:17,000,000-17,999,999 and 5 | adding unique kmers (k=63) to the start and end. 6 | 7 | Unique kmers added were: 8 | GGTCGCACACAAATACTACGGGCATTGGATGCGACCATAAGTCTTGACAGGCTTTGTTCCCT 9 | ACAACTTAACCTGGAACTAGAACTAATTTATGAGCGAGCCAGAACAGGTAGTCTGAGGGAGT 10 | 11 | Regenerate with: `make` 12 | -------------------------------------------------------------------------------- /results/benchmark/10diploid10X/run-sim.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Ten diploid 1Kb genomes 20X covg 4 | # 5 | # To run: 6 | # ./run-sim.sh 7 | # 8 | # To clear up: 9 | # ./run-sim.sh clean 10 | # 11 | 12 | make -f ../calling-comparison.mk \ 13 | SEQ=smaller.fa NUM_INDIVS=10 PLOIDY=2 KMER=31 \ 14 | SNPS=10 INDELS=10 INV=10 INVLEN=10 \ 15 | READLEN=100 MPSIZE=250 ALLELECOVG=10 \ 16 | MEMWIDTH=20 MEMHEIGHT=15 $@ 17 | -------------------------------------------------------------------------------- /results/hash_table_benchmark/results20150409thurs.mac.txt: -------------------------------------------------------------------------------- 1 | Reading: results20150409thurs.mac.csv 2 | Rows: 35 3 | 1 [1:5] mean: 57.9732 stddev: 2.082858 4 | 2 [6:10] mean: 29.659 stddev: 3.398727 5 | 3 [11:15] mean: 27.3986 stddev: 0.5796527 6 | 4 [16:20] mean: 34.1938 stddev: 14.46233 7 | 5 [21:25] mean: 40.0266 stddev: 0.5128282 8 | 6 [26:30] mean: 24.392 stddev: 0.74178 9 | 7 [31:35] mean: 25.5838 stddev: 1.673209 10 | -------------------------------------------------------------------------------- /libs/misc/mem_size.h: -------------------------------------------------------------------------------- 1 | #ifndef MEM_SIZE_H_ 2 | #define MEM_SIZE_H_ 3 | 4 | /** 5 | * Returns the size of physical memory (RAM) in bytes. 6 | * Author: David Robert Nadeau 7 | * Site: http://NadeauSoftware.com/ 8 | * License: Creative Commons Attribution 3.0 Unported License 9 | * http://creativecommons.org/licenses/by/3.0/deed.en_US 10 | */ 11 | size_t getMemorySize(); 12 | 13 | #endif /* MEM_SIZE_H_ */ 14 | -------------------------------------------------------------------------------- /tests/build/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash -euo pipefail 2 | 3 | # Each test case is in a separate sub-directory 4 | 5 | # build0: random sequence, sort graph, reassemble sequence 6 | # build1: test --intersection and --graph arguments 7 | 8 | all: 9 | cd build0 && $(MAKE) 10 | cd build1 && $(MAKE) 11 | @echo "All looks good." 12 | 13 | clean: 14 | cd build0 && $(MAKE) clean 15 | cd build1 && $(MAKE) clean 16 | 17 | .PHONY: all clean 18 | -------------------------------------------------------------------------------- /scripts/R/install-deps.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript --vanilla 2 | 3 | # 4 | # Install all R packages required by McCortex R scripts 5 | # 6 | 7 | getpkg <- function(pkg) { 8 | if(!require(pkg, character.only=TRUE)) { 9 | install.packages(pkg, dep=TRUE, repos='http://cran.rstudio.com/') 10 | } 11 | } 12 | 13 | getpkg('ggplot2') 14 | getpkg('gridExtra') 15 | getpkg('reshape') 16 | getpkg('scales') 17 | getpkg('plyr') 18 | getpkg('cowplot') 19 | -------------------------------------------------------------------------------- /tests/vcfcov/calls3/hardy.fa: -------------------------------------------------------------------------------- 1 | >hardy1 2 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG 3 | >hardy2 4 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG 5 | -------------------------------------------------------------------------------- /results/correct/hg_chr22/README.txt: -------------------------------------------------------------------------------- 1 | Isaac Turner 2 | 2014-09-25 3 | 4 | Using 1Mb of chr22 and empirical PhiX Illumina reads to simulate and measure 5 | the power and error rate of read correction using a de Bruijn graph 6 | 7 | Requires mccortex/results/data directory 8 | 9 | To reproduce: 10 | 11 | cd libs && make core common && cd .. 12 | make MAXK=31 13 | cd results/correct 14 | make 15 | 16 | Runtime on my macbook is ~10 minutes 17 | -------------------------------------------------------------------------------- /results/README.txt: -------------------------------------------------------------------------------- 1 | Data and code to reproduce experiments 2 | 3 | In order to run any of these experiments, you must run the follow commands to 4 | compile McCortex and download the required data: 5 | 6 | cd .. 7 | for k in 31 63 95 127; do make MAXK=$k; done 8 | cd results/data 9 | ./download.sh 10 | 11 | You must also fetch and download the PhiX data yourself from Illumina's 12 | basespace. Details are in results/data/PhiX/about.txt 13 | -------------------------------------------------------------------------------- /results/contig_confidence/Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash -euo pipefail 2 | 3 | all: confidence_plot.pdf confidence.pdf 4 | 5 | confidence_plot.pdf: plot-confidence.R 6 | R --vanilla -f plot-confidence.R --args $@ 7 | 8 | confidence.pdf: confidence.tex confidence_plot.pdf .force 9 | pdflatex confidence 10 | pdflatex confidence 11 | 12 | clean: 13 | rm -rf confidence_plot.pdf confidence.{pdf,aux,log} 14 | 15 | .force: 16 | 17 | .PHONY: all clean .force 18 | -------------------------------------------------------------------------------- /tests/vcfcov/calls3/laurel.fa: -------------------------------------------------------------------------------- 1 | >laurel1 2 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG 3 | >laurel2 85G>A 4 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCaTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG 5 | -------------------------------------------------------------------------------- /results/benchmark/minidiploid/run-sim.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Single small diploid 1Kb 20X covg 4 | # 5 | # To run: 6 | # ./run-sim.sh 7 | # 8 | # To clear up: 9 | # ./run-sim.sh clean 10 | # 11 | 12 | { echo '>rnd'; seqrnd 1000; } | facat > rnd.fa 13 | 14 | make -f ../calling-comparison.mk \ 15 | SEQ=rnd.fa NUM_INDIVS=1 PLOIDY=2 KMER=31 \ 16 | SNPS=0 INDELS=100 INV=0 INVLEN=10 \ 17 | READLEN=100 MPSIZE=250 ALLELECOVG=10 \ 18 | MEMWIDTH=20 MEMHEIGHT=15 $@ 19 | -------------------------------------------------------------------------------- /tests/clean_graph/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash -euo pipefail 2 | 3 | # Test graph cleaning 4 | # Each test case is in a separate sub-directory 5 | 6 | all: 7 | cd clean1 && $(MAKE) 8 | cd clean2 && $(MAKE) 9 | cd clean3 && $(MAKE) 10 | cd clean4 && $(MAKE) 11 | @echo "clean_graph: All looks good." 12 | 13 | clean: 14 | cd clean1 && $(MAKE) clean 15 | cd clean2 && $(MAKE) clean 16 | cd clean3 && $(MAKE) clean 17 | cd clean4 && $(MAKE) clean 18 | 19 | .PHONY: all clean 20 | -------------------------------------------------------------------------------- /travis/script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | echo "Branch: ${TRAVIS_BRANCH}" 6 | echo "OS: ${TRAVIS_OS_NAME}" 7 | echo "CC: ${CC}" 8 | echo "Perl: ${TRAVIS_PERL_VERSION}" 9 | 10 | # The COVERITY_SCAN_BRANCH environment variable will be set to 1 when the 11 | # Coverity Scan addon is in operation 12 | # Only run if we are not doing Coverity Scan analysis 13 | if [ "${COVERITY_SCAN_BRANCH}" != 1 ] 14 | then 15 | # Build and run all tests 16 | cd tests && ./run.sh 17 | fi 18 | -------------------------------------------------------------------------------- /results/hash_table_benchmark/stats.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript --vanilla 2 | 3 | args <- commandArgs(trailingOnly = TRUE) 4 | 5 | if(length(args) != 1) { 6 | stop("Usage: stats.R "); 7 | } 8 | 9 | cat('Reading:',args[1],'\n'); 10 | 11 | x=read.csv(file=args[1],as.is=T,header=F) 12 | 13 | cat('Rows:',nrow(x),'\n'); 14 | 15 | i=1 16 | j=1 17 | 18 | while(i <= nrow(x)) { 19 | cat(j,' [',i,':',i+4,'] mean: ',mean(x[i:(i+4),1]),' stddev: ',sd(x[i:(i+4),1]),'\n',sep=''); 20 | i=i+5; j=j+1; 21 | } 22 | -------------------------------------------------------------------------------- /src/basic/common_buffers.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_BUFFERS_H_ 2 | #define COMMON_BUFFERS_H_ 3 | 4 | #include "madcrowlib/madcrow_buffer.h" 5 | 6 | madcrow_buffer(char_ptr_buf, CharPtrBuffer, char*); 7 | madcrow_buffer(byte_buf, ByteBuffer, uint8_t); 8 | madcrow_buffer(uint32_buf, Uint32Buffer, uint32_t); 9 | madcrow_buffer(int32_buf, Int32Buffer, int32_t); 10 | madcrow_buffer(size_buf, SizeBuffer, size_t); 11 | madcrow_buffer_wipe(zsize_buf, ZeroSizeBuffer, size_t); 12 | 13 | #endif /* COMMON_BUFFERS_H_ */ 14 | -------------------------------------------------------------------------------- /results/traversal_conjecture/README.txt: -------------------------------------------------------------------------------- 1 | Isaac Turner 2 | 2014 Sep 24 3 | 4 | In order to re-run the experiment, from the mccortex directory run: 5 | 6 | # Fetch libraries needed 7 | cd libs && make core common && cd.. 8 | # Compile McCortex 9 | make MAXK=31 10 | # Generate the reference from chr22 11 | cd results/data/chr22/uniq_flanks && make && cd ../../../.. 12 | # Run the experiment 13 | cd results/exp_abc/1MbpHg19 14 | make 15 | 16 | That's it! 17 | 18 | On my macbook run time is 40 mins 19 | -------------------------------------------------------------------------------- /results/data/chr22/about.txt: -------------------------------------------------------------------------------- 1 | Isaac Turner 2 | 2014-10-01 3 | 4 | 1Mbp of chr22 from hg19 (GRCh37) 5 | 6 | To generate: 7 | 8 | wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/chromosomes/chr22.fa.gz 9 | gzip -d chr22.fa.gz 10 | samtools faidx chr22.fa chr22:17000000-17999999 | tr -d 'n' | tr -d 'N' > chr22_17M_18M.fa 11 | 12 | Check it is 1Mbp still 13 | 14 | dnacat -s chr22_17M_18M.fa 15 | 16 | Index with BWA and samtools 17 | 18 | bwa index chr22_17M_18M.fa 19 | samtools faidx chr22_17M_18M.fa 20 | 21 | -------------------------------------------------------------------------------- /tests/lossless/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash -euo pipefail 2 | 3 | # Each test case is in a separate sub-directory 4 | 5 | # We show that a sequence can be losslessly regenerated from its linked dBG 6 | # lossless0: random sequence in a single read is regenerated 7 | # lossless1: minimum required read length to reproduce exact contig 8 | 9 | all: 10 | cd lossless0 && $(MAKE) 11 | cd lossless1 && $(MAKE) 12 | @echo "All looks good." 13 | 14 | clean: 15 | cd lossless0 && $(MAKE) clean 16 | cd lossless1 && $(MAKE) clean 17 | 18 | .PHONY: all clean 19 | -------------------------------------------------------------------------------- /libs/misc/Makefile: -------------------------------------------------------------------------------- 1 | # These files are all compiled by the main Makefile, 2 | # this is just here as documentation 3 | 4 | PLATFORM := $(shell uname) 5 | COMPILER := $(shell ($(CC) -v 2>&1) | tr A-Z a-z ) 6 | 7 | # clang Link Time Optimisation (lto) seems to have issues atm 8 | ifneq (,$(findstring clang,$(COMPILER))) 9 | OPT = -O3 10 | else 11 | OPT = -O4 12 | endif 13 | 14 | CFLAGS=-Wall -Wextra $(OPT) 15 | 16 | all: city.o mem_size.o 17 | 18 | %.o: %.c %.h 19 | $(CC) $(CFLAGS) -c $*.c -o $*.o 20 | 21 | clean: 22 | rm -rf *.o 23 | 24 | .PNOHY: all clean 25 | -------------------------------------------------------------------------------- /tests/breakpoint/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash -euo pipefail 2 | 3 | # Each test case is in a separate sub-directory 4 | 5 | # breakpoint0: empty breakpoint calls (K=31) 6 | # breakpoint1: test we get the correct VCF (K=11) 7 | # breakpoint2: check we don't call any ref bubbles 8 | 9 | all: 10 | cd breakpoint0 && $(MAKE) 11 | cd breakpoint1 && $(MAKE) 12 | cd breakpoint2 && $(MAKE) 13 | @echo "All looks good." 14 | 15 | clean: 16 | cd breakpoint0 && $(MAKE) clean 17 | cd breakpoint1 && $(MAKE) clean 18 | cd breakpoint2 && $(MAKE) clean 19 | 20 | .PHONY: all clean 21 | -------------------------------------------------------------------------------- /tests/bubbles/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash -euo pipefail 2 | 3 | # Test bubble calling and decomposing to VCF 4 | # Each test case is in a separate sub-directory 5 | 6 | all: 7 | cd bubbles1 && $(MAKE) 8 | cd bubbles2 && $(MAKE) 9 | cd bubbles3 && $(MAKE) 10 | cd bubbles4 && $(MAKE) 11 | cd bubbles5 && $(MAKE) 12 | @echo "bubbles: All looks good." 13 | 14 | clean: 15 | cd bubbles1 && $(MAKE) clean 16 | cd bubbles2 && $(MAKE) clean 17 | cd bubbles3 && $(MAKE) clean 18 | cd bubbles4 && $(MAKE) clean 19 | cd bubbles5 && $(MAKE) clean 20 | 21 | .PHONY: all clean 22 | -------------------------------------------------------------------------------- /scripts/analysis/mummer2vcf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eou pipefail 4 | 5 | snpfile=$1 6 | reffile=$2 7 | 8 | echo '##fileformat=VCFv4.1' 9 | echo '##fileDate='`date '+%Y%m%d'` 10 | echo "##reference=$reffile" 11 | ~/c/dnacat/bin/dnacat -L $reffile | awk '{OFS=""; print "##chrom="}' 12 | echo '##FORMAT=' 13 | echo | awk '{OFS="\t"; print "#CHROM","POS","ID","REF","ALT","QUAL","FILTER","INFO","FORMAT"}' 14 | 15 | grep -E '^\s*[0-9]' $snpfile | awk '{OFS="\t"; print $14,$1,".",$2,$3,".",".",".","GT"}' 16 | -------------------------------------------------------------------------------- /tests/vcfcov/calls4/calls.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.2 2 | ##FILTER= 3 | ##fileDate=20151014 4 | ##reference=ref/ref.fa 5 | ##contig= 6 | ##FORMAT= 7 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 8 | ref 7 . G T . PASS . . 9 | ref 28 . A C . PASS . . 10 | ref 49 . GG CGAT . PASS . . 11 | ref 71 . A GAT . PASS . . 12 | ref 92 . GCACCAGGG AC . PASS . . 13 | ref 121 . C G . PASS . . 14 | ref 142 . GGCACAGCA TTACTCTTC . PASS . . 15 | ref 171 . A CTAG . PASS . . 16 | ref 192 . CATCATAG A . PASS . . 17 | -------------------------------------------------------------------------------- /results/benchmark/diploid60X/run-sim.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Single diploid 1Mb genome 60X covg 4 | # 5 | # To run: 6 | # cd dir/this/is/in 7 | # ./run-sim.sh 8 | # 9 | # To clear up: 10 | # cd dir/this/is/in 11 | # ./run-sim.sh clean 12 | # 13 | 14 | make -f ../calling-comparison.mk \ 15 | SEQ=../chr21.1Mb.fa NUM_INDIVS=1 PLOIDY=2 KMER=31 \ 16 | SNPS=15000 INDELS=7500 INV=750 INVLEN=100 \ 17 | READLEN=100 MPSIZE=250 ALLELECOVG=30 \ 18 | MEMWIDTH=20 MEMHEIGHT=20 MAPARGS='--substitutionrate=0.01 ' $@ 19 | 20 | # sites=0.01*sum(1/1)*L=0.01*1M = 10,000 21 | # sites=0.01*sum(1/1+1/2)*L = 15,000 22 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160912mon/notes.txt: -------------------------------------------------------------------------------- 1 | At k=21, ~0.2%[1] of mutations add a new edge between existing kmers. 2 | With an error rate of 0.5% and 100X coverage, there are 500,000 seqn errors[2]. 3 | Therefore we add approximately ~1325 new edges[3] to the graph with errors, even 4 | if we clean off all kmers due to sequencing errors. This equates to an edge every 5 | 1000bp. 6 | 7 | 8 | [1] 7951 / (3*10**6), see seqn.errors.csv 9 | [2] 100*10**6*0.005, 100X coverage, ref is 1Mbp, 0.5% sequencing error rate 10 | [3] 500000*0.002, 500,000 errors, 0.2% are likely to be edges between existing kmers 11 | -------------------------------------------------------------------------------- /scripts/perl/LineReader.pm: -------------------------------------------------------------------------------- 1 | package LineReader; 2 | 3 | use strict; 4 | use warnings; 5 | use Carp; 6 | 7 | sub new 8 | { 9 | my ($class,$fh,$path) = @_; 10 | 11 | my $self = { 12 | _fh => $fh, 13 | _path => $path, 14 | _next => [] 15 | }; 16 | 17 | bless $self, $class; 18 | return $self; 19 | } 20 | 21 | sub read_line 22 | { 23 | my ($self) = @_; 24 | my $fh = $self->{_fh}; 25 | my $next = shift(@{$self->{_next}}); 26 | return defined($next) ? $next : <$fh>; 27 | } 28 | 29 | sub unread_line 30 | { 31 | my ($self,$line) = @_; 32 | unshift(@{$self->{_next}}, $line); 33 | } 34 | 35 | 1; -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/cleaning.corr.table.csv: -------------------------------------------------------------------------------- 1 | # Number of kmers in the perfect, raw and cleaned graphs 2 | # _nreal is the number of real kmers in the raw/cleaned graph 3 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal 4 | k21,912362,1115087,912361,912624,912361 5 | k31,962865,1438774,962864,962835,962835 6 | k41,984331,1635697,984330,984322,984322 7 | k51,992751,1697232,992749,992735,992735 8 | k61,996024,1674397,996022,996008,996008 9 | k71,997500,1587464,997498,997498,997498 10 | k81,998290,1441765,998288,998287,998287 11 | k91,998785,1242238,998704,995357,995352 12 | k99,999043,907324,854979,576,576 13 | -------------------------------------------------------------------------------- /results/var_calling_10ecoli/results/20150510_sun_initrun/20150510.sun.txt.4: -------------------------------------------------------------------------------- 1 | make -f task.k21.mk CTXDIR=../.. MEM=2G breakpointvcf 2 | make[1]: Entering directory `/data1/users/turner/cortex_sims/ninja-cortex/results/bubble_calling_10ecoli' 3 | make[1]: Nothing to be done for `breakpointvcf'. 4 | make[1]: Leaving directory `/data1/users/turner/cortex_sims/ninja-cortex/results/bubble_calling_10ecoli' 5 | ../../libs/bcftools/bcftools isec truth.k21.norm.vcf.gz proj/vcfs/breakpoints.k21.vcf.gz -p truthisec 6 | McCortex-brkpt Missed: 1557 / 45789 ( 3.40%) 7 | McCortex-brkpt FP: 238 / 44470 ( 0.54%) 8 | McCortex-brkpt Found: 44232 / 45789 (96.60%) 9 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160929thurs/cleaning.table.csv: -------------------------------------------------------------------------------- 1 | # Number of kmers in the perfect, raw and cleaned graphs 2 | # _nreal is the number of real kmers in the raw/cleaned graph 3 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal 4 | k21,912362,8064108,912361,916474,912340 5 | k31,962865,10437776,962864,963212,962853 6 | k41,984331,11684532,984330,984311,984310 7 | k51,992751,11939641,992749,992711,992711 8 | k61,996024,11301966,996022,996009,996009 9 | k71,997500,9841194,997498,997436,997436 10 | k81,998290,7606657,998286,997359,997359 11 | k91,998785,4640344,996949,406297,406277 12 | k99,999043,1484663,703944,2269,2266 13 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/cleaning.table.csv: -------------------------------------------------------------------------------- 1 | # Number of kmers in the perfect, raw and cleaned graphs 2 | # _nreal is the number of real kmers in the raw/cleaned graph 3 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal 4 | k21,912362,8064108,912361,916474,912340 5 | k31,962865,10437776,962864,963212,962853 6 | k41,984331,11684532,984330,984311,984310 7 | k51,992751,11939641,992749,992711,992711 8 | k61,996024,11301966,996022,996009,996009 9 | k71,997500,9841194,997498,997436,997436 10 | k81,998290,7606657,998286,997359,997359 11 | k91,998785,4640344,996949,406297,406277 12 | k99,999043,1484663,703944,2269,2266 13 | -------------------------------------------------------------------------------- /libs/misc/jenkins.h: -------------------------------------------------------------------------------- 1 | #ifndef JENKINS_H_ 2 | #define JENKINS_H_ 3 | 4 | // 5 ops per byte 5 | static inline uint32_t jenkins_mix(uint32_t h, uint8_t x) { 6 | h += x; h += (h<<10); h ^= (h>>6); return h; 7 | } 8 | 9 | static inline uint32_t jenkins_finish(uint32_t h) { 10 | h += (h<<3); h ^= (h>>11); h += (h<<15); return h; 11 | } 12 | 13 | // 5*bytes+6 ops [32bit => 26, 64 => 46] 14 | static inline uint32_t jenkins_one_at_a_time_hash(const uint8_t *key, size_t len) 15 | { 16 | uint32_t hash, i; 17 | for(hash = i = 0; i < len; ++i) hash = jenkins_mix(hash, key[i]); 18 | return jenkins_finish(hash); 19 | } 20 | 21 | #endif /* JENKINS_H_ */ 22 | -------------------------------------------------------------------------------- /tests/threading/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash -euo pipefail 2 | 3 | # Test read threading through the graph 4 | # Each test case is in a separate sub-directory 5 | # threading1: 6 | # threading2: paired-end threading 7 | # threading3: paired-end threading with short reads 8 | # threading4: 9 | 10 | all: 11 | cd threading1 && $(MAKE) 12 | cd threading2 && $(MAKE) 13 | cd threading3 && $(MAKE) 14 | cd threading4 && $(MAKE) 15 | @echo "threading: All looks good." 16 | 17 | clean: 18 | cd threading1 && $(MAKE) clean 19 | cd threading2 && $(MAKE) clean 20 | cd threading3 && $(MAKE) clean 21 | cd threading4 && $(MAKE) clean 22 | 23 | .PHONY: all clean 24 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160912mon/seqn.errors.csv: -------------------------------------------------------------------------------- 1 | # The number of sequencing errors that would add a new edge between two 2 | # existing kmers. Note: there are 3*reflen possible mutations 3 | kmer,reflen,nkmers,nedges,nerror_edges,cov,err_rate,est_bad_edges 4 | 21,1000000,927610,933930,7951,100,0.005,1325 5 | 31,1000000,971394,973934,1874,100,0.005,312 6 | 41,1000000,988492,989443,394,100,0.005,65 7 | 51,1000000,994492,994828,85,100,0.005,14 8 | 61,1000000,996793,996939,35,100,0.005,5 9 | 71,1000000,997897,997975,9,100,0.005,1 10 | 81,1000000,998506,998551,4,100,0.005,0 11 | 91,1000000,998891,998921,6,100,0.005,1 12 | 99,1000000,999092,999114,0,100,0.005,0 13 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20160929thurs/bad.edges.csv: -------------------------------------------------------------------------------- 1 | # The number of sequencing errors that would add a new edge between two 2 | # existing kmers. Note: there are 3*reflen possible mutations 3 | kmer,reflen,nkmers,nedges,nerror_edges,cov,err_rate,est_bad_edges 4 | 21,1000000,927610,933930,7951,100,0.005,1325 5 | 31,1000000,971394,973934,1874,100,0.005,312 6 | 41,1000000,988492,989443,394,100,0.005,65 7 | 51,1000000,994492,994828,85,100,0.005,14 8 | 61,1000000,996793,996939,35,100,0.005,5 9 | 71,1000000,997897,997975,9,100,0.005,1 10 | 81,1000000,998506,998551,4,100,0.005,0 11 | 91,1000000,998891,998921,6,100,0.005,1 12 | 99,1000000,999092,999114,0,100,0.005,0 13 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20161012wed/bad.edges.csv: -------------------------------------------------------------------------------- 1 | # The number of sequencing errors that would add a new edge between two 2 | # existing kmers. Note: there are 3*reflen possible mutations 3 | kmer,reflen,nkmers,nedges,nerror_edges,cov,err_rate,est_bad_edges 4 | 21,1000000,927610,933930,7951,100,0.005,1325 5 | 31,1000000,971394,973934,1874,100,0.005,312 6 | 41,1000000,988492,989443,394,100,0.005,65 7 | 51,1000000,994492,994828,85,100,0.005,14 8 | 61,1000000,996793,996939,35,100,0.005,5 9 | 71,1000000,997897,997975,9,100,0.005,1 10 | 81,1000000,998506,998551,4,100,0.005,0 11 | 91,1000000,998891,998921,6,100,0.005,1 12 | 99,1000000,999092,999114,0,100,0.005,0 13 | -------------------------------------------------------------------------------- /results/klebsiella/kleb_pneumoniae/freebayes/analysis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eou pipefail 4 | set -o xtrace 5 | 6 | CTXDIR=../../../../ 7 | 8 | REF=../ref/GCF_000016305.1_ASM1630v1_genomic.fna.gz 9 | MUMMER=../mummer/mummer.vcf.gz 10 | TRUTH=../truth/CAV1016.fa 11 | MAPPING_TEST=$CTXDIR/scripts/analysis/mapping-vars-test.sh 12 | MUMMER_ISEC=$CTXDIR/scripts/analysis/mummer-vcf-isec.sh 13 | 14 | vcf=freebayes.vcf.gz 15 | name=freebayes 16 | 17 | mkdir -p mapping_truth mummer_isec 18 | 19 | echo "Mapping test..." 20 | $MAPPING_TEST $vcf $REF $TRUTH mapping_truth/$name 21 | echo "Mummer intersection..." 22 | $MUMMER_ISEC $MUMMER $vcf mummer_isec/$name >& $name.isec.log 23 | -------------------------------------------------------------------------------- /results/klebsiella/kleb_pneumoniae/platypus/analysis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eou pipefail 4 | set -o xtrace 5 | 6 | CTXDIR=../../../../ 7 | 8 | REF=../ref/GCF_000016305.1_ASM1630v1_genomic.fna.gz 9 | MUMMER=../mummer/mummer.vcf.gz 10 | TRUTH=../truth/CAV1016.fa 11 | MAPPING_TEST=$CTXDIR/scripts/analysis/mapping-vars-test.sh 12 | MUMMER_ISEC=$CTXDIR/scripts/analysis/mummer-vcf-isec.sh 13 | 14 | vcf=platypus.vcf.gz 15 | name=platypus 16 | 17 | mkdir -p mapping_truth mummer_isec 18 | 19 | echo "Mapping test..." 20 | $MAPPING_TEST $vcf $REF $TRUTH mapping_truth/$name 21 | echo "Mummer intersection..." 22 | $MUMMER_ISEC $MUMMER $vcf mummer_isec/$name >& $name.isec.log 23 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/bad.edges.csv: -------------------------------------------------------------------------------- 1 | # The number of sequencing errors that would add a new edge between two 2 | # existing kmers. Note: there are 3*reflen possible mutations 3 | kmer,reflen,nkmers,nedges,nerror_edges,cov,err_rate,est_bad_edges 4 | 21,1000000,955450,959133,3627,100,0.005,604 5 | 31,1000000,980343,981744,838,100,0.005,139 6 | 41,1000000,990014,990588,183,100,0.005,30 7 | 51,1000000,993938,994179,44,100,0.005,7 8 | 61,1000000,995750,995879,15,100,0.005,2 9 | 71,1000000,996793,996870,7,100,0.005,1 10 | 81,1000000,997395,997442,2,100,0.005,0 11 | 91,1000000,997789,997822,5,100,0.005,0 12 | 99,1000000,998029,998055,4,100,0.005,0 13 | -------------------------------------------------------------------------------- /results/klebsiella/kleb_pneumoniae/cortex/analysis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eou pipefail 4 | set -o xtrace 5 | 6 | CTXDIR=../../../../ 7 | 8 | REF=../ref/GCF_000016305.1_ASM1630v1_genomic.fna.gz 9 | MUMMER=../mummer/mummer.vcf.gz 10 | TRUTH=../truth/CAV1016.fa 11 | MAPPING_TEST=$CTXDIR/scripts/analysis/mapping-vars-test.sh 12 | MUMMER_ISEC=$CTXDIR/scripts/analysis/mummer-vcf-isec.sh 13 | 14 | vcf=cortex.k31.k61.vcf.gz 15 | name=cortex.k31.k61 16 | 17 | mkdir -p mapping_truth mummer_isec 18 | 19 | echo "Mapping test..." 20 | $MAPPING_TEST $vcf $REF $TRUTH mapping_truth/$name 21 | echo "Mummer intersection..." 22 | $MUMMER_ISEC $MUMMER $vcf mummer_isec/$name >& $name.isec.log 23 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/bad.edges.csv: -------------------------------------------------------------------------------- 1 | # The number of sequencing errors that would add a new edge between two 2 | # existing kmers. Note: there are 3*reflen possible mutations 3 | kmer,reflen,nkmers,nedges,nerror_edges,cov,err_rate,est_bad_edges 4 | 21,1000000,955450,959133,3627,100,0.005,604 5 | 31,1000000,980343,981744,838,100,0.005,139 6 | 41,1000000,990014,990588,183,100,0.005,30 7 | 51,1000000,993938,994179,44,100,0.005,7 8 | 61,1000000,995750,995879,15,100,0.005,2 9 | 71,1000000,996793,996870,7,100,0.005,1 10 | 81,1000000,997395,997442,2,100,0.005,0 11 | 91,1000000,997789,997822,5,100,0.005,0 12 | 99,1000000,998029,998055,4,100,0.005,0 13 | -------------------------------------------------------------------------------- /src/tools/infer_edges.h: -------------------------------------------------------------------------------- 1 | #ifndef INFER_EDGES_H_ 2 | #define INFER_EDGES_H_ 3 | 4 | #include "cortex_types.h" 5 | #include "db_graph.h" 6 | 7 | // `pop_edges` if true, only add edges that are in at least one other colour 8 | // -> If two kmers are in a sample and the population has an edges between 9 | // them, add edge to sample. 10 | // Return 1 if changed; 0 otherwise 11 | bool infer_kmer_edges(const BinaryKmer node_bkey, bool pop_edges, 12 | Edges *edges, const Covg *covgs, 13 | const dBGraph *db_graph); 14 | 15 | size_t infer_edges(size_t nthreads, bool add_all_edges, const dBGraph *db_graph); 16 | 17 | #endif /* INFER_EDGES_H_ */ 18 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/bad.edges.csv: -------------------------------------------------------------------------------- 1 | # The number of sequencing errors that would add a new edge between two 2 | # existing kmers. Note: there are 3*reflen possible mutations 3 | kmer,reflen,nkmers,nedges,nerror_edges,cov,err_rate,est_bad_edges 4 | 21,1000000,955450,959133,3627,100,0.005,604 5 | 31,1000000,980343,981744,838,100,0.005,139 6 | 41,1000000,990014,990588,183,100,0.005,30 7 | 51,1000000,993938,994179,44,100,0.005,7 8 | 61,1000000,995750,995879,15,100,0.005,2 9 | 71,1000000,996793,996870,7,100,0.005,1 10 | 81,1000000,997395,997442,2,100,0.005,0 11 | 91,1000000,997789,997822,5,100,0.005,0 12 | 99,1000000,998029,998055,4,100,0.005,0 13 | -------------------------------------------------------------------------------- /libs/cJSON/tests/test1: -------------------------------------------------------------------------------- 1 | { 2 | "glossary": { 3 | "title": "example glossary", 4 | "GlossDiv": { 5 | "title": "S", 6 | "GlossList": { 7 | "GlossEntry": { 8 | "ID": "SGML", 9 | "SortAs": "SGML", 10 | "GlossTerm": "Standard Generalized Markup Language", 11 | "Acronym": "SGML", 12 | "Abbrev": "ISO 8879:1986", 13 | "GlossDef": { 14 | "para": "A meta-markup language, used to create markup languages such as DocBook.", 15 | "GlossSeeAlso": ["GML", "XML"] 16 | }, 17 | "GlossSee": "markup" 18 | } 19 | } 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /tests/vcfcov/calls5/calls.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.2 2 | ##FILTER= 3 | ##fileDate=20151014 4 | ##reference=ref/ref.fa 5 | ##contig= 6 | ##FORMAT= 7 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 8 | ref 7 . G T . PASS . . 9 | ref 15 . GCAATACCCCCCGATGACGG G . PASS . . 10 | ref 20 . A T . PASS . . 11 | ref 22 . C G . PASS . . 12 | ref 40 . T A . PASS . . 13 | ref 49 . GG CGAT . PASS . . 14 | ref 71 . A GAT . PASS . . 15 | ref 92 . GCACCAGGG AC . PASS . . 16 | ref 121 . C G . PASS . . 17 | ref 142 . GGCACAGCA TTACTCTTC . PASS . . 18 | ref 171 . A CTAG . PASS . . 19 | ref 192 . CATCATAG A . PASS . . 20 | -------------------------------------------------------------------------------- /scripts/calculations/c-within-rand-a-b-bounds.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | # 7 | # Sample three random numbers, return how often 8 | # c >= min(a,b) and c <= max(a,b) 9 | # 10 | 11 | use List::Util qw(min max); 12 | 13 | # Human genome 14 | my $genome_size = 3100000000; 15 | my $num_loops = 100000; # 100M 16 | 17 | my $num_within = 0; 18 | 19 | for(my $i = 0; $i < $num_loops; $i++) { 20 | my ($a,$b,$c) = map {int(rand($genome_size))} 0..2; 21 | if($c >= min($a,$b) && $c <= max($a,$b)) { 22 | $num_within++; 23 | } 24 | } 25 | 26 | 27 | my $percent = 100 * $num_within / $num_loops; 28 | print "$num_within / $num_loops (".sprintf("%.2f", $percent)."%)\n"; 29 | -------------------------------------------------------------------------------- /src/tools/correct_reads.h: -------------------------------------------------------------------------------- 1 | #ifndef CORRECT_READS_H_ 2 | #define CORRECT_READS_H_ 3 | 4 | #include "correct_aln_input.h" 5 | #include "db_graph.h" 6 | 7 | /** 8 | * Correct reads against the graph, and print out 9 | * @param fq_zero use to fill quality scores; defaults to '.' if zero 10 | * @param append_orig_seq If true print out '>name orig=ORIGSEQ' 11 | */ 12 | void correct_reads(CorrectAlnInput *inputs, size_t num_inputs, 13 | const char *dump_seqgap_hist_path, 14 | const char *dump_fraglen_hist_path, 15 | char fq_zero, bool append_orig_seq, 16 | size_t num_threads, const dBGraph *db_graph); 17 | 18 | #endif /* CORRECT_READS_H_ */ 19 | -------------------------------------------------------------------------------- /tests/vcfcov/calls1/jane.fa: -------------------------------------------------------------------------------- 1 | >jane 1A>G 50G>A 199G>C 2 | gCTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGaCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAcG 3 | >jane 1A>C 50G>C 199G>A 4 | cCTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGcCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAaG 5 | >jane chr1 6 | TGGGCCAGTACGGTGAATCCCTGATGATCCGCATAGTTTGTAAGTCAAAATGGCGACCGGTCGGTGGGTGTGTAGAGCAACCGGAAAGCTTGCCTTATAG 7 | >jane chr1 30C>T 8 | TGGGCCAGTACGGTGAATCCCTGATGATCtGCATAGTTTGTAAGTCAAAATGGCGACCGGTCGGTGGGTGTGTAGAGCAACCGGAAAGCTTGCCTTATAG 9 | -------------------------------------------------------------------------------- /libs/cJSON/tests/test3: -------------------------------------------------------------------------------- 1 | {"widget": { 2 | "debug": "on", 3 | "window": { 4 | "title": "Sample Konfabulator Widget", 5 | "name": "main_window", 6 | "width": 500, 7 | "height": 500 8 | }, 9 | "image": { 10 | "src": "Images/Sun.png", 11 | "name": "sun1", 12 | "hOffset": 250, 13 | "vOffset": 250, 14 | "alignment": "center" 15 | }, 16 | "text": { 17 | "data": "Click Here", 18 | "size": 36, 19 | "style": "bold", 20 | "name": "text1", 21 | "hOffset": 250, 22 | "vOffset": 100, 23 | "alignment": "center", 24 | "onMouseUp": "sun1.opacity = (sun1.opacity / 100) * 90;" 25 | } 26 | }} -------------------------------------------------------------------------------- /scripts/report/make-kmer-plot.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eou pipefail 3 | 4 | if [[ $# -ne 3 ]]; then 5 | echo "usage: $0 " 1>&2 6 | exit -1 7 | fi 8 | 9 | #in: data/sample.kK.se.links.csv 10 | #out: plots/sample.kK.se.links.pdf 11 | script=$1 12 | in=$2 13 | out=$3 14 | 15 | ROOT=`echo $in | awk '{gsub(/\.(raw|clean).cov.csv$/,"")}1'` 16 | 17 | CUTOFF=`([[ -e $ROOT.kthresh ]] && cat $ROOT.kthresh) || echo 0` 18 | KCOV=`([[ -e $ROOT.kmercov ]] && cat $ROOT.kmercov) || echo 0` 19 | 20 | echo in=$in 21 | echo out=$out 22 | echo cutoff_file=$ROOT.kthresh 23 | echo kcov_file=$ROOT.kmercov 24 | echo CUTOFF=$CUTOFF 25 | echo KCOV=$KCOV 26 | 27 | set -o xtrace 28 | $script $in $out $CUTOFF $KCOV 29 | -------------------------------------------------------------------------------- /src/basic/str_parsing.c: -------------------------------------------------------------------------------- 1 | #include "global.h" 2 | #include "str_parsing.h" 3 | 4 | // Parse a comma separated list e.g. "12,3,12" 5 | // Returns <0 on error, otherwise number of chars used 6 | int comma_list_to_array(const char *str, SizeBuffer *nums) 7 | { 8 | size_t num = 0; 9 | const char *ptr = str; 10 | char *end = NULL; 11 | 12 | // If no numbers success 13 | if(*ptr < '0' && *ptr > '9') return 0; 14 | 15 | while(1) { 16 | num = strtoul(ptr, &end, 10); 17 | size_buf_add(nums, num); 18 | if(!end) die("Cannot parse: '%s'", str); 19 | if(*end != ',') break; 20 | ptr = end+1; 21 | if(*ptr < '0' || *ptr > '9') return -1; // no number after comma! 22 | } 23 | 24 | return end-str; 25 | } 26 | 27 | -------------------------------------------------------------------------------- /scripts/perl/mccortex-header.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | # Use current directory to find modules 7 | use FindBin; 8 | use lib $FindBin::Bin; 9 | 10 | use McCortexScripts; # load_json_hdr() 11 | 12 | sub print_usage 13 | { 14 | for my $err (@_) { print STDERR "Error: $err\n"; } 15 | print STDERR "" . 16 | "Usage $0 17 | 18 | Read only the JSON header from a file. 19 | 20 | "; 21 | 22 | exit(-1); 23 | } 24 | 25 | if(@ARGV != 1) { print_usage(); } 26 | my $path = shift(@ARGV); 27 | 28 | use IO::Zlib; 29 | my $gz = new IO::Zlib; 30 | $gz->open($path, "rb") or die("Cannot open file: $path"); 31 | 32 | my $hdr = load_json_hdr($gz, $path); 33 | 34 | print $hdr; 35 | 36 | $gz->close(); 37 | -------------------------------------------------------------------------------- /results/hash_table_benchmark/results20150409thurs.linux.txt: -------------------------------------------------------------------------------- 1 | Reading: results20150409thurs.linux.csv 2 | Rows: 70 3 | 1 [1:5] mean: 42.402 stddev: 0.1351203 4 | 2 [6:10] mean: 491.7282 stddev: 0.6633338 5 | 3 [11:15] mean: 74.853 stddev: 4.378826 6 | 4 [16:20] mean: 245.0712 stddev: 83.90267 7 | 5 [21:25] mean: 73.7224 stddev: 16.67115 8 | 6 [26:30] mean: 327.1884 stddev: 126.755 9 | 7 [31:35] mean: 14.6294 stddev: 6.844972 10 | 8 [36:40] mean: 136.237 stddev: 45.24435 11 | 9 [41:45] mean: 19.4692 stddev: 2.524081 12 | 10 [46:50] mean: 216.0472 stddev: 16.80419 13 | 11 [51:55] mean: 41.7132 stddev: 3.203693 14 | 12 [56:60] mean: 325.4446 stddev: 78.24026 15 | 13 [61:65] mean: 50.8198 stddev: 11.77143 16 | 14 [66:70] mean: 404.2498 stddev: 113.7489 17 | -------------------------------------------------------------------------------- /tests/subgraph_unitigs/Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash -euo pipefail 2 | 3 | K=11 4 | CTXDIR=../.. 5 | MCCORTEX=$(shell echo $(CTXDIR)/bin/mccortex$$[(($(K)+31)/32)*32 - 1]) 6 | 7 | all: subgraph0.k$(K).ctx subgraph1.k$(K).ctx 8 | 9 | seed.fa: 10 | echo TGCCTAGAAGG > seed.fa 11 | 12 | seq.fa: 13 | echo ATGGTGCCTAGAAGGTA > seq.fa 14 | echo cTGGTGCCTAGAAGGTg >> seq.fa 15 | 16 | graph.k$(K).ctx: seq.fa 17 | $(MCCORTEX) build -q -m 1M -k $(K) --sample MsGraph --seq seq.fa graph.k$(K).ctx 18 | 19 | subgraph%.k$(K).ctx: graph.k$(K).ctx seed.fa 20 | $(MCCORTEX) subgraph -q --seed seed.fa --unitigs --dist $* -o subgraph$*.k$(K).ctx graph.k$(K).ctx 21 | 22 | clean: 23 | rm -rf subgraph*.k$(K).ctx graph.k$(K).ctx seed.fa seq.fa 24 | 25 | .PHONY: all clean 26 | -------------------------------------------------------------------------------- /results/data/chr22/Makefile: -------------------------------------------------------------------------------- 1 | 2 | SHELL=/bin/bash -eou pipefail 3 | 4 | CTXDIR=../../../ 5 | SAMTOOLS=$(CTXDIR)/libs/samtools/samtools 6 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat 7 | 8 | all: chr22.fa chr22_17M_18M.fa chr22_28M_29M.fa 9 | 10 | chr22.fa: 11 | curl -O http://hgdownload.cse.ucsc.edu/goldenPath/hg19/chromosomes/chr22.fa.gz 12 | gzip -d chr22.fa.gz 13 | 14 | chr22_17M_18M.fa: chr22.fa 15 | $(SAMTOOLS) faidx chr22.fa chr22:17000000-17999999 | \ 16 | $(DNACAT) -M <(echo 'chr22_17M_18M') - > $@ 17 | if $(DNACAT) -P $@ | grep -qi n; then false; fi 18 | 19 | chr22_28M_29M.fa: chr22.fa 20 | $(SAMTOOLS) faidx chr22.fa chr22:28000000-28999999 | \ 21 | $(DNACAT) -M <(echo 'chr22_28M_29M') - > $@ 22 | if $(DNACAT) -P $@ | grep -qi n; then false; fi 23 | -------------------------------------------------------------------------------- /src/graph/graph_format.h: -------------------------------------------------------------------------------- 1 | #ifndef GRAPH_FORMAT_H_ 2 | #define GRAPH_FORMAT_H_ 3 | 4 | // graph file format version 5 | #define CTX_GRAPH_FILEFORMAT 6 6 | 7 | #include "graph_info.h" 8 | 9 | // Graph (.ctx) 10 | typedef struct 11 | { 12 | uint32_t version, kmer_size, num_of_bitfields, num_of_cols; 13 | GraphInfo *ginfo; // Cleaning info etc for each colour 14 | size_t capacity; 15 | } GraphFileHeader; 16 | 17 | void graph_header_capacity(GraphFileHeader *header, size_t num_of_cols); 18 | void graph_header_dealloc(GraphFileHeader *header); 19 | void graph_header_print(const GraphFileHeader *header); 20 | 21 | static inline void graph_header_free(GraphFileHeader *hdr) { 22 | graph_header_dealloc(hdr); 23 | ctx_free(hdr); 24 | } 25 | 26 | #endif /* GRAPH_FORMAT_H_ */ 27 | -------------------------------------------------------------------------------- /tests/coverage/Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash -euo pipefail 2 | 3 | CTXDIR=../.. 4 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat 5 | MCCORTEX=$(CTXDIR)/bin/mccortex31 6 | K=5 7 | 8 | TGTS=seq.fa rnd.fa seq.k$(K).ctx coverage.txt 9 | 10 | all: $(TGTS) 11 | 12 | clean: 13 | rm -rf $(TGTS) 14 | 15 | %.fa: 16 | $(DNACAT) -F -n 50 > $@ 17 | 18 | seq.k$(K).ctx: seq.fa 19 | $(MCCORTEX) build -q -k $(K) --sample Wallace \ 20 | --sample Gromit --seq seq.fa \ 21 | --sample Trousers --seq seq.fa --seq2 seq.fa:seq.fa $@ 22 | $(MCCORTEX) view -qk $@ 23 | 24 | coverage.txt: seq.k$(K).ctx rnd.fa 25 | $(MCCORTEX) coverage -q --seq rnd.fa -1 seq.fa seq.k$(K).ctx > coverage.txt 26 | cat coverage.txt 27 | 28 | .PHONY: all clean 29 | -------------------------------------------------------------------------------- /results/contig_confidence/confidence.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \title{Assembly confidence} 3 | 4 | \usepackage{mathtools} 5 | \usepackage{graphicx} 6 | \usepackage{subcaption} 7 | 8 | \begin{document} 9 | 10 | \section{Assembly Confidence} 11 | 12 | % TODO Description 13 | 14 | \begin{figure}[ht] 15 | \centering 16 | \includegraphics[width=5in]{confidence_plot.pdf} 17 | \caption{Confidence values for various read lengths and coverages} 18 | \label{fig:contig_conf_values_plot} 19 | \end{figure} 20 | 21 | Shown in Figure \ref{fig:contig_conf_values_plot}. 22 | 23 | \begin{gather*} 24 | \lambda = Coverage / ReadLength \\ 25 | R_k = ReadLength - JunctionDistance + 1 \\ 26 | confidence = \left(1 - e^{-\lambda R_k}\right) e^{-\lambda e^{-\lambda R_k}} 27 | \end{gather*} 28 | 29 | 30 | \end{document} 31 | -------------------------------------------------------------------------------- /tests/vcfcov/calls1/calls.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##fileDate=20151014 3 | ##FORMAT= 4 | ##reference=ref/ref.fa 5 | ##contig= 6 | ##contig= 7 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 8 | ref 1 . A T . PASS . GT 9 | ref 1 . A T . PASS . GT 10 | ref 1 . A G . PASS . GT 11 | ref 1 . A G . PASS . GT 12 | ref 1 . A C . PASS . GT 13 | ref 1 . A C,T . PASS . GT 14 | ref 50 . G T . PASS . GT 15 | ref 50 . G T . PASS . GT 16 | ref 50 . G A . PASS . GT 17 | ref 50 . G A . PASS . GT 18 | ref 50 . G C . PASS . GT 19 | ref 199 . G T . PASS . GT 20 | ref 199 . G T . PASS . GT 21 | ref 199 . G C . PASS . GT 22 | ref 199 . G C . PASS . GT 23 | ref 199 . G A . PASS . GT 24 | chr1 30 . C T . PASS . GT 25 | chr1 30 . C T . PASS . GT 26 | -------------------------------------------------------------------------------- /src/graph/graph_search.h: -------------------------------------------------------------------------------- 1 | #ifndef GRAPH_SEARCH_H_ 2 | #define GRAPH_SEARCH_H_ 3 | 4 | #include "cortex_types.h" 5 | #include "binary_kmer.h" 6 | #include "graph_file_reader.h" 7 | 8 | // 9 | // Search a sorted graph file on disk 10 | // 11 | 12 | typedef struct GraphFileSearch GraphFileSearch; 13 | 14 | GraphFileSearch *graph_search_new(GraphFileReader *file); 15 | void graph_search_destroy(GraphFileSearch *gs); 16 | 17 | bool graph_search_find(GraphFileSearch *gs, BinaryKmer bkey, 18 | Covg *covgs, Edges *edges); 19 | 20 | void graph_search_fetch(GraphFileSearch *gs, size_t idx, 21 | BinaryKmer *bkey, Covg *covgs, Edges *edges); 22 | 23 | void graph_search_rand(GraphFileSearch *gs, 24 | BinaryKmer *bkey, Covg *covgs, Edges *edges); 25 | 26 | #endif /* GRAPH_SEARCH_H_ */ 27 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/notes.txt: -------------------------------------------------------------------------------- 1 | 2 | Requires python and uses the bundled version of bfc. SGA assembly can also be 3 | generated if you have it installed (e.g. with `brew install science/sga`). 4 | 5 | Specify the absolue path to the reference FASTA: 6 | 7 | REF=../data/chr22/chr22_28M_29M.fa 8 | # OR 9 | REF=../data/chr22/chr22_17M_18M.fa 10 | # Then convert to absolute path 11 | REF=$(cd "$(dirname "$REF")"; pwd)/$(basename "$REF") 12 | 13 | Sample reads and assemble with: 14 | 15 | make REF=$REF 16 | 17 | Run SGA assembly 18 | 19 | ./sga-all-kmers.sh stocherr_cov/sga data/stocherr_cov.1.fq.gz data/stocherr_cov.2.fq.gz $REF 20 | ./sga-all-kmers.sh stocherr_corr/sga data/stocherr_corr.1.fq.gz data/stocherr_corr.2.fq.gz $REF 21 | 22 | Generate plots and tables with: 23 | 24 | cd results 25 | ./generate-results.sh 26 | -------------------------------------------------------------------------------- /scripts/R/plot-length-hist.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript --vanilla 2 | 3 | # Plot coverage histograms generated by e.g. 'mccortex31 clean --length-before out.csv ...' 4 | # input csv should have the columns: 'bp' and 'Count' 5 | # 6 | args <- commandArgs(trailingOnly=TRUE) 7 | if(length(args) != 2) { 8 | stop("Usage: Rscript --vanilla plot-hist-hist.R \n") 9 | } 10 | 11 | input_csv=args[1] 12 | output_pdf=args[2] 13 | 14 | library('ggplot2') 15 | 16 | d=read.csv(file=input_csv,sep=',',as.is=T) 17 | 18 | p <- ggplot(data=d, aes(bp, Count)) + 19 | geom_bar(stat="identity", color="seagreen") + 20 | scale_y_log10() + 21 | xlab("Untig length (bases)") + 22 | ylab("Number of unitigs") + 23 | ggtitle("Unitig length distribution") + 24 | xlim(0,75) 25 | 26 | ggsave(filename=output_pdf, plot=p, width=6, height=6) 27 | -------------------------------------------------------------------------------- /tests/vcfcov/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash -euo pipefail 2 | 3 | # Each test case is in a separate sub-directory 4 | 5 | # call0: test with empty VCF 6 | # call1: overlapping SNPs at ref:1,50,199, chr1:50. Handle missing contig= header. 7 | # call2: SNP every base, across two chromosomes 8 | # call3: blocks of overlapping variants (on one chrom) 9 | # call4: variants exactly k-1 bases apart (on one chrom) 10 | # call5: test for large indels 11 | 12 | all: 13 | cd calls0 && $(MAKE) 14 | cd calls1 && $(MAKE) 15 | cd calls2 && $(MAKE) 16 | cd calls3 && $(MAKE) 17 | cd calls4 && $(MAKE) 18 | cd calls5 && $(MAKE) 19 | @echo "vcfcov: All looks good." 20 | 21 | clean: 22 | cd calls0 && $(MAKE) clean 23 | cd calls1 && $(MAKE) clean 24 | cd calls2 && $(MAKE) clean 25 | cd calls3 && $(MAKE) clean 26 | cd calls4 && $(MAKE) clean 27 | cd calls5 && $(MAKE) clean 28 | 29 | .PHONY: all clean view 30 | -------------------------------------------------------------------------------- /scripts/bash/links-median-threshold.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eou pipefail 4 | 5 | if [ $# -ne 3 ]; then 6 | >&2 echo "usage: $0 " && 7 | >&2 echo " Pick threshold for cleaning links" && false 8 | fi 9 | 10 | fdr_limit=$1 11 | k=$2 12 | tree_csv=$3 13 | 14 | maxk=$[ ( ($k + 31) / 32 ) * 32 - 1 ] 15 | DIR=$( cd $( dirname ${BASH_SOURCE[0]} ) && pwd ) 16 | CTX="$DIR/../../bin/mccortex $k" 17 | 18 | thresh=$($CTX linkthresh -q --zero $fdr_limit $[$k+2] $tree_csv; 19 | $CTX linkthresh -q --zero $fdr_limit $[$k+3] $tree_csv; 20 | $CTX linkthresh -q --zero $fdr_limit $[$k+4] $tree_csv; 21 | $CTX linkthresh -q --zero $fdr_limit $[$k+5] $tree_csv; 22 | $CTX linkthresh -q --zero $fdr_limit $[$k+6] $tree_csv;) 23 | 24 | # Print all 5 values 25 | echo $thresh; 26 | # Print median 27 | echo $thresh | tr " " "\n" | sort -n | head -3 | tail -1 28 | -------------------------------------------------------------------------------- /scripts/build/mccortex: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Exit immediately if a command exits with a non-zero status. 4 | set -euo pipefail 5 | 6 | function usage { 7 | echo "usage: mccortex [ ...]" >&2 8 | echo " Wrapper to find the correct mccortex binary given kmer size (K)" >&2 9 | exit -1 10 | } 11 | 12 | if [[ $# -lt 1 || !( $1 =~ ^[0-9]+$ ) ]] 13 | then 14 | usage 15 | fi 16 | 17 | K=$1 18 | shift 19 | 20 | if [[ $[ $K & 1 ] -eq 0 || $K -lt 3 ]] 21 | then 22 | echo "kmer is not odd and greater than 2: $K" >&2 23 | exit -1 24 | fi 25 | 26 | MAXK=$[ (($K+31)/32)*32 - 1 ] 27 | PARENTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && cd .. && pwd )" 28 | CMD=$PARENTDIR/bin/mccortex$MAXK 29 | 30 | if ! [[ -e $CMD ]] 31 | then 32 | echo "Error: $CMD not found" >&2 33 | echo "Please compile mccortex with: 'make MAXK=$MAXK'" >&2 34 | exit -2 35 | fi 36 | 37 | # Run 38 | $CMD "$@" 39 | -------------------------------------------------------------------------------- /tests/largeK/Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash -euo pipefail 2 | 3 | # Run build and clean commands followed by the check command 4 | # to test we can build valid graphs for random sequence 5 | # Works for any K value (e.g. K=11, K=39, K=61) 6 | 7 | K=39 8 | CTXDIR=../.. 9 | MCCORTEX=$(shell echo $(CTXDIR)/bin/mccortex$$[(($(K)+31)/32)*32 - 1]) 10 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat 11 | 12 | GRAPHS=rnd.k$(K).ctx rnd.clean.k$(K).ctx 13 | LOGS=$(addsuffix .log,$(GRAPHS)) 14 | TGTS=rnd.fa $(GRAPHS) 15 | 16 | all: $(TGTS) 17 | 18 | clean: 19 | rm -rf $(TGTS) $(LOGS) 20 | 21 | rnd.fa: 22 | $(DNACAT) -F -n 200 > $@ 23 | 24 | rnd.k$(K).ctx: rnd.fa 25 | $(MCCORTEX) build -k $(K) --sample rnd --seq rnd.fa $@ >& $@.log 26 | $(MCCORTEX) check -q $@ 27 | 28 | rnd.clean.k$(K).ctx: rnd.k$(K).ctx 29 | $(MCCORTEX) clean -o $@ --unitigs=2 $< >& $@.log 30 | $(MCCORTEX) check -q $@ 31 | 32 | .PHONY: all clean 33 | -------------------------------------------------------------------------------- /tests/vcfcov/calls1/john.fa: -------------------------------------------------------------------------------- 1 | >john 2 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG 3 | >john 4 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG 5 | >john 1A>T 50G>T 199G>T 6 | tCTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGtCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAtG 7 | >john chr1 30C>T 8 | TGGGCCAGTACGGTGAATCCCTGATGATCtGCATAGTTTGTAAGTCAAAATGGCGACCGGTCGGTGGGTGTGTAGAGCAACCGGAAAGCTTGCCTTATAG 9 | >john chr1 30C>T 10 | TGGGCCAGTACGGTGAATCCCTGATGATCtGCATAGTTTGTAAGTCAAAATGGCGACCGGTCGGTGGGTGTGTAGAGCAACCGGAAAGCTTGCCTTATAG 11 | -------------------------------------------------------------------------------- /results/klebsiella/kleb_pneumoniae/mcrun/analysis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eou pipefail 4 | set -o xtrace 5 | 6 | CTXDIR=../../../../ 7 | 8 | function myreadlink() { 9 | ( cd $(dirname $1); echo $PWD/$(basename $1); ) 10 | } 11 | 12 | REF=$(myreadlink ../ref/GCF_000016305.1_ASM1630v1_genomic.fna.gz) 13 | MUMMER=$(myreadlink ../mummer/mummer.vcf.gz) 14 | TRUTH=$(myreadlink ../truth/CAV1016.fa) 15 | BCFTOOLS=$(myreadlink $CTXDIR/libs/bcftools/bcftools) 16 | MAPPING_TEST=$(myreadlink $CTXDIR/scripts/analysis/mapping-vars-test.sh) 17 | MUMMER_ISEC=$(myreadlink $CTXDIR/scripts/analysis/mummer-vcf-isec.sh) 18 | 19 | mkdir -p mapping_truth mummer_isec 20 | 21 | for vcf in `ls vcfs/*k{61,51}.vcf.gz`; do 22 | name=`basename $vcf .vcf.gz` 23 | echo "== $name" 24 | [ -e $vcf.csi ] || $BCFTOOLS index $vcf 25 | $MAPPING_TEST $vcf $REF $TRUTH mapping_truth/$name 26 | $MUMMER_ISEC $MUMMER $vcf mummer_isec/$name >& $name.isec.log 27 | done 28 | 29 | -------------------------------------------------------------------------------- /src/global/cortex_types.h: -------------------------------------------------------------------------------- 1 | #ifndef CORTEX_TYPES_H_ 2 | #define CORTEX_TYPES_H_ 3 | 4 | typedef size_t Colour; 5 | typedef uint8_t Edges; 6 | typedef uint32_t Covg; 7 | 8 | #define COVG_MAX UINT_MAX 9 | 10 | #define SAFE_ADD_COVG(a,b) ((uint64_t)(a)+(b) > COVG_MAX ? COVG_MAX : (a)+(b)) 11 | #define SAFE_SUM_COVG(a,b) ((a) = SAFE_ADD_COVG((a), (b))) 12 | 13 | typedef uint8_t Orientation; 14 | #define FORWARD 0 15 | #define REVERSE 1 16 | 17 | typedef uint8_t ReadMateDir; 18 | #define READPAIR_FF 0 19 | #define READPAIR_FR 1 20 | #define READPAIR_RF 2 21 | #define READPAIR_RR 3 22 | // See seq_reader.h to get string representations (MP_DIR_STRS[dir]) 23 | 24 | #define read_mate_r1(r) ((r)&2) 25 | #define read_mate_r2(r) ((r)&1) 26 | 27 | // don't ever use the top bit of hkey, used later for orientation 28 | typedef uint64_t hkey_t; 29 | 30 | typedef struct { 31 | hkey_t orient:1, key:63; 32 | } dBNode; 33 | 34 | #endif /* CORTEX_TYPES_H_ */ 35 | -------------------------------------------------------------------------------- /results/file_buffering/file-buffering.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | function usage { 6 | echo "usage: $0 " >&2 7 | echo " Compare buffered vs unbuffered read times" >&2 8 | exit -1 9 | } 10 | 11 | if [ $# -ne 1 ]; then usage; fi 12 | 13 | SEQTEST=../../libs/seq_file/benchmarks/seqtest 14 | 15 | # Load into disk cache 16 | $SEQTEST --no-zlib --no-buf $1 >& /dev/null 17 | 18 | ( 19 | time $SEQTEST --no-zlib --no-buf $1; 20 | time $SEQTEST --no-zlib --no-buf $1; 21 | time $SEQTEST --no-zlib --no-buf $1; 22 | time $SEQTEST --no-zlib --no-buf $1; 23 | time $SEQTEST --no-zlib --no-buf $1; 24 | 25 | time $SEQTEST --no-zlib $1; 26 | time $SEQTEST --no-zlib $1; 27 | time $SEQTEST --no-zlib $1; 28 | time $SEQTEST --no-zlib $1; 29 | time $SEQTEST --no-zlib $1; 30 | ) 2>&1 | \ 31 | grep '^user' | \ 32 | sed -E 's/.*([0-9]+)m([0-9\.]+)s.*/\1 \2/g' | \ 33 | awk '{print $1*60+$2}' 34 | -------------------------------------------------------------------------------- /scripts/bash/mccortex-to-ray.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Exit immediately if a command exits with a non-zero status. 4 | set -euo pipefail 5 | 6 | cmd=$0 7 | if [[ $# -ne 1 && $# -ne 2 ]]; 8 | then 9 | echo "usage $cmd " 10 | exit -1; 11 | fi 12 | 13 | DIR=$( cd $( dirname ${BASH_SOURCE[0]} ) && cd .. && pwd ) 14 | CTX="$DIR/bin/mccortex31 view" 15 | 16 | col=0 17 | if [[ $# -eq 2 ]] 18 | then 19 | col=$2 20 | 21 | # Check number of colours in binary 22 | bincols=`$CTX --info $1 | grep 'colours:' | grep -o '[0-9]*$'` || exit 23 | if [[ $col -ge $bincols ]] 24 | then 25 | echo "Binary only has $bincols colours (you requested $col)" 26 | exit -1 27 | fi 28 | fi 29 | 30 | $CTX --print_kmers $1 | awk 'BEGIN { col='$col' } { 31 | covg=$(2+col); 32 | edges=$(2+col+(NF-1)/2); 33 | x=substr(edges,0,4); y=substr(edges,5,8); 34 | gsub("\\.","",x); gsub("\\.","",y); 35 | print $1";"covg";"toupper(x)";"y 36 | }' 37 | -------------------------------------------------------------------------------- /tests/vcfcov/calls0/truth.cov.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.2 2 | ##FILTER= 3 | ##fileDate=20151014 4 | ##reference=ref/ref.fa 5 | ##contig= 6 | ##FORMAT= 7 | ##INFO= 8 | ##INFO= 9 | ##FORMAT= 10 | ##FORMAT= 11 | ##FORMAT= 12 | ##FORMAT= 13 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT MasterGrunt John Jane 14 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/cleaning.corr.table.csv: -------------------------------------------------------------------------------- 1 | # Number of kmers in the perfect, raw and cleaned graphs 2 | # _nreal is the number of real kmers in the raw/cleaned graph 3 | # raw_errs, clean_errs are the fraction of error kmers in each graph 4 | # frac_remove_errs is the fraction of kmers removed that were seqn errs 5 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal,raw_errs,clean_errs,frac_remove_errs 6 | 21,943524,1041105,943524,943614,943524,0.09373,0.00010,1.00000 7 | 31,973199,1184735,973199,973203,973199,0.17855,0.00000,1.00000 8 | 41,985939,1265236,985939,985939,985939,0.22075,0.00000,1.00000 9 | 51,991526,1292210,991526,991526,991526,0.23269,0.00000,1.00000 10 | 61,994205,1284937,994205,994205,994205,0.22626,0.00000,1.00000 11 | 71,995708,1249759,995708,995708,995708,0.20328,0.00000,1.00000 12 | 81,996639,1188147,996639,996638,996638,0.16118,0.00000,0.99999 13 | 91,997273,1102712,997215,994901,994896,0.09567,0.00001,0.97849 14 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170206mon_chr22_28M/cleaning.table.csv: -------------------------------------------------------------------------------- 1 | # Number of kmers in the perfect, raw and cleaned graphs 2 | # _nreal is the number of real kmers in the raw/cleaned graph 3 | # raw_errs, clean_errs are the fraction of error kmers in each graph 4 | # frac_remove_errs is the fraction of kmers removed that were seqn errs 5 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal,raw_errs,clean_errs,frac_remove_errs 6 | 21,943524,8281513,943524,944420,943506,0.88607,0.00097,1.00000 7 | 31,973199,10505480,973199,973255,973151,0.90736,0.00011,0.99999 8 | 41,985939,11685959,985939,985901,985901,0.91563,0.00000,1.00000 9 | 51,991526,11925083,991526,991494,991494,0.91685,0.00000,1.00000 10 | 61,994205,11290786,994205,994174,994174,0.91195,0.00000,1.00000 11 | 71,995708,9832665,995706,995658,995658,0.89873,0.00000,0.99999 12 | 81,996639,7598955,996636,995784,995784,0.86885,0.00000,0.99987 13 | 91,997273,4637375,995584,409278,409248,0.78531,0.00007,0.86132 14 | -------------------------------------------------------------------------------- /scripts/bash/vcf-longest-haplotype.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | cmd=$0 6 | if [[ $# -ne 1 ]]; then 7 | echo "usage: $cmd " 8 | exit -1 9 | fi 10 | 11 | if ! [[ -e $1 ]]; then 12 | echo "Cannot read $1" 13 | exit -1 14 | fi 15 | 16 | STATS=`grep -v '^#' $1 | awk '{print $5}' | tr ',' ' 17 | ' | awk '{print length}' | sort -n | awk 'BEGIN{max=0; sum=0;} 18 | { values[NR]=$1; sum += $1; if ( $1 > max ) { max = $1; } } 19 | END{ 20 | median = (NR % 2) ? values[(NR + 1) / 2] \ 21 | : (values[(NR / 2)] + values[(NR / 2) + 1]) / 2.0; 22 | print sum" "NR" "max" "sprintf("%.1f", sum/NR)" "median 23 | }'` 24 | 25 | sum=`echo $STATS | cut -d' ' -f1` 26 | num=`echo $STATS | cut -d' ' -f2` 27 | max=`echo $STATS | cut -d' ' -f3` 28 | mean=`echo $STATS | cut -d' ' -f4` 29 | median=`echo $STATS | cut -d' ' -f5` 30 | 31 | echo "[Haplotype length (bp)] longest: $max; mean: $mean; median: $median \ 32 | [$num alleles; $sum bp total]" 33 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/cleaning.corr.table.csv: -------------------------------------------------------------------------------- 1 | # Number of kmers in the perfect, raw and cleaned graphs 2 | # _nreal is the number of real kmers in the raw/cleaned graph 3 | # raw_errs, clean_errs are the fraction of error kmers in each graph 4 | # frac_remove_errs is the fraction of kmers removed that were seqn errs 5 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal,raw_errs,clean_errs,frac_remove_errs 6 | 21,943524,1041105,943524,943614,943524,0.09373,0.00010,1.00000 7 | 31,973199,1184735,973199,973203,973199,0.17855,0.00000,1.00000 8 | 41,985939,1265236,985939,985939,985939,0.22075,0.00000,1.00000 9 | 51,991526,1292210,991526,991526,991526,0.23269,0.00000,1.00000 10 | 61,994205,1284937,994205,994205,994205,0.22626,0.00000,1.00000 11 | 71,995708,1249759,995708,995708,995708,0.20328,0.00000,1.00000 12 | 81,996639,1188147,996639,996638,996638,0.16118,0.00000,0.99999 13 | 91,997273,1102712,997215,994901,994896,0.09567,0.00001,0.97849 14 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/cleaning.table.csv: -------------------------------------------------------------------------------- 1 | # Number of kmers in the perfect, raw and cleaned graphs 2 | # _nreal is the number of real kmers in the raw/cleaned graph 3 | # raw_errs, clean_errs are the fraction of error kmers in each graph 4 | # frac_remove_errs is the fraction of kmers removed that were seqn errs 5 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal,raw_errs,clean_errs,frac_remove_errs 6 | 21,943524,8281513,943524,944420,943506,0.88607,0.00097,1.00000 7 | 31,973199,10505480,973199,973255,973151,0.90736,0.00011,0.99999 8 | 41,985939,11685959,985939,985901,985901,0.91563,0.00000,1.00000 9 | 51,991526,11925083,991526,991494,991494,0.91685,0.00000,1.00000 10 | 61,994205,11290786,994205,994174,994174,0.91195,0.00000,1.00000 11 | 71,995708,9832665,995706,995658,995658,0.89873,0.00000,0.99999 12 | 81,996639,7598955,996636,995784,995784,0.86885,0.00000,0.99987 13 | 91,997273,4637375,995584,409278,409248,0.78531,0.00007,0.86132 14 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/cleaning.corr.table.csv: -------------------------------------------------------------------------------- 1 | # Number of kmers in the perfect, raw and cleaned graphs 2 | # _nreal is the number of real kmers in the raw/cleaned graph 3 | # raw_errs, clean_errs are the fraction of error kmers in each graph 4 | # frac_remove_errs is the fraction of kmers removed that were seqn errs 5 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal,raw_errs,clean_errs,frac_remove_errs 6 | 21,943524,1041105,943524,943614,943524,0.09373,0.00010,1.00000 7 | 31,973199,1184735,973199,973203,973199,0.17855,0.00000,1.00000 8 | 41,985939,1265236,985939,985939,985939,0.22075,0.00000,1.00000 9 | 51,991526,1292210,991526,991526,991526,0.23269,0.00000,1.00000 10 | 61,994205,1284937,994205,994205,994205,0.22626,0.00000,1.00000 11 | 71,995708,1249759,995708,995708,995708,0.20328,0.00000,1.00000 12 | 81,996639,1188147,996639,996638,996638,0.16118,0.00000,0.99999 13 | 91,997273,1102712,997215,994901,994896,0.09567,0.00001,0.97849 14 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/cleaning.table.csv: -------------------------------------------------------------------------------- 1 | # Number of kmers in the perfect, raw and cleaned graphs 2 | # _nreal is the number of real kmers in the raw/cleaned graph 3 | # raw_errs, clean_errs are the fraction of error kmers in each graph 4 | # frac_remove_errs is the fraction of kmers removed that were seqn errs 5 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal,raw_errs,clean_errs,frac_remove_errs 6 | 21,943524,8281513,943524,944420,943506,0.88607,0.00097,1.00000 7 | 31,973199,10505480,973199,973255,973151,0.90736,0.00011,0.99999 8 | 41,985939,11685959,985939,985901,985901,0.91563,0.00000,1.00000 9 | 51,991526,11925083,991526,991494,991494,0.91685,0.00000,1.00000 10 | 61,994205,11290786,994205,994174,994174,0.91195,0.00000,1.00000 11 | 71,995708,9832665,995706,995658,995658,0.89873,0.00000,0.99999 12 | 81,996639,7598955,996636,995784,995784,0.86885,0.00000,0.99987 13 | 91,997273,4637375,995584,409278,409248,0.78531,0.00007,0.86132 14 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/count-links.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | my $i = 0; 7 | my ($kmersize,$total_kmers) = (undef,undef); 8 | 9 | print "".join("\t", ('K', 'n_graph_kmers', 'n_link_kmers', 'n_links', 'link_junction_mem'))."\n"; 10 | 11 | while(my $line = <>) { 12 | chomp($line); 13 | if($line =~ /\[graph\] kmer-size: ([0-9]+)/i) { 14 | $kmersize = $1; 15 | } 16 | elsif($line =~ /\[GReader\] ([0-9,]+) kmers, .* filesize/i) { 17 | $total_kmers = $1; 18 | $total_kmers =~ s/,//g; 19 | } 20 | elsif($line =~ /kmers-with-paths: ([0-9,]+), num paths: ([0-9,]+), path-bytes: (.*)/gi) { 21 | my ($nkmers,$nlinks,$linkmem) = ($1,$2,$3); 22 | $nkmers =~ s/,//g; 23 | $nlinks =~ s/,//g; 24 | $linkmem =~ s/,//g; 25 | print "$kmersize\t$total_kmers\t$nkmers\t$nlinks\t$linkmem\n"; 26 | ($kmersize,$total_kmers) = (undef,undef) 27 | } 28 | $i++ 29 | } 30 | 31 | print STDERR "[$0] read $i lines\n"; 32 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/sga-all-kmers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eou pipefail 4 | set -x 5 | 6 | if [ $# != 4 ] 7 | then 8 | ( >&2 echo "usage: $0 " ) 9 | exit -1 10 | fi 11 | 12 | function abspath { 13 | echo "$(cd "$(dirname "$1")"; pwd)/$(basename "$1")" 14 | } 15 | 16 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 17 | CTXDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && cd ../.. && pwd )" 18 | 19 | DNACAT=${CTXDIR}/libs/seq_file/bin/dnacat 20 | PY_BREAK_VS_TRUTH=${CTXDIR}/scripts/python/break-contigs-vs-truth.py 21 | 22 | outdir="$1" 23 | IN1=`abspath "$2"` 24 | IN2=`abspath "$3"` 25 | REF=`abspath "$4"` 26 | 27 | KMERS="21 31 41 51 61 71 81 91" 28 | 29 | mkdir -p $outdir 30 | cd $outdir 31 | 32 | $DIR/sga.sh $IN1 $IN2 "$KMERS" 33 | 34 | for k in `echo "$KMERS"` 35 | do 36 | $DNACAT -P $REF | $PY_BREAK_VS_TRUTH 21 k$k/assemble.m${k}-contigs.fa > k$k/stats.k${k}.out 2> k$k/stats.k${k}.txt 37 | done 38 | 39 | cd .. 40 | -------------------------------------------------------------------------------- /src/graph/prune_nodes.h: -------------------------------------------------------------------------------- 1 | #ifndef PRUNE_NODES_H_ 2 | #define PRUNE_NODES_H_ 3 | 4 | // 5 | // Pruning nodes from the graph 6 | // 7 | #include "cortex_types.h" 8 | #include "db_node.h" 9 | 10 | // Remove a node from the graph, do not edit any edges / adjacent nodes 11 | // Threadsafe 12 | void prune_node_without_edges_mt(dBGraph *db_graph, hkey_t hkey); 13 | 14 | void prune_node(dBGraph *db_graph, hkey_t node); 15 | 16 | // Unitig pruning used by ctx_clean 17 | void prune_unitig(dBNode *nodes, size_t len, dBGraph *db_graph); 18 | 19 | // Used by ctx_subgraph.c, clean_graph.c 20 | // flags is a bit array, one bit per kmer 21 | void prune_nodes_lacking_flag(size_t num_threads, const uint8_t *flags, 22 | dBGraph *db_graph); 23 | 24 | // Currently unused 25 | // remove nodes if not in any colour 26 | // i.e. db_node_has_col(graph,node,colour) == false for all colours 27 | void prune_uncoloured_nodes(dBGraph *db_graph); 28 | 29 | #endif /* PRUNE_NODES_H_ */ 30 | -------------------------------------------------------------------------------- /libs/cJSON/tests/test5: -------------------------------------------------------------------------------- 1 | {"menu": { 2 | "header": "SVG Viewer", 3 | "items": [ 4 | {"id": "Open"}, 5 | {"id": "OpenNew", "label": "Open New"}, 6 | null, 7 | {"id": "ZoomIn", "label": "Zoom In"}, 8 | {"id": "ZoomOut", "label": "Zoom Out"}, 9 | {"id": "OriginalView", "label": "Original View"}, 10 | null, 11 | {"id": "Quality"}, 12 | {"id": "Pause"}, 13 | {"id": "Mute"}, 14 | null, 15 | {"id": "Find", "label": "Find..."}, 16 | {"id": "FindAgain", "label": "Find Again"}, 17 | {"id": "Copy"}, 18 | {"id": "CopyAgain", "label": "Copy Again"}, 19 | {"id": "CopySVG", "label": "Copy SVG"}, 20 | {"id": "ViewSVG", "label": "View SVG"}, 21 | {"id": "ViewSource", "label": "View Source"}, 22 | {"id": "SaveAs", "label": "Save As"}, 23 | null, 24 | {"id": "Help"}, 25 | {"id": "About", "label": "About Adobe CVG Viewer..."} 26 | ] 27 | }} 28 | -------------------------------------------------------------------------------- /scripts/report/make-link-plot.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eou pipefail 3 | 4 | if [[ $# -ne 3 ]]; then 5 | echo "usage: $0 " 1>&2 6 | exit -1 7 | fi 8 | 9 | #in: data/sample.kK.se.links.csv 10 | #out: plots/sample.kK.se.links.pdf 11 | script=$1 12 | in=$2 13 | out=$3 14 | 15 | KMER=`echo "$in" | grep -oE 'k[0-9]+' | grep -oE '[0-9]+'` 16 | CUTOFFFILE=`echo "$in" | awk '{gsub(/\.links\.csv$/,".links.thresh")}1'` 17 | KCOVFILE=`echo "$in" | awk '{gsub(/\.(se|pe)\.links\.csv$/,".kmercov")}1'` 18 | READLENFILE=`echo "$in" | awk '{gsub(/\.(se|pe)\.links\.csv$/,".readlen")}1'` 19 | 20 | CUTOFF=`([[ -e $CUTOFFFILE ]] && cat $CUTOFFFILE) || echo 0` 21 | KCOV=`([[ -e $KCOVFILE ]] && cat $KCOVFILE) || echo 0` 22 | READLEN=`([[ -e $READLENFILE ]] && cat $READLENFILE) || echo 0` 23 | 24 | echo KMER=$KMER 25 | echo CUTOFFFILE=$CUTOFFFILE 26 | echo KCOVFILE=$KCOVFILE 27 | echo READLENFILE=$READLENFILE 28 | 29 | set -o xtrace 30 | $script $in $out $CUTOFF $KMER $KCOV $READLEN 31 | -------------------------------------------------------------------------------- /src/basic/hash_mem.h: -------------------------------------------------------------------------------- 1 | #ifndef HASH_MEM_H_ 2 | #define HASH_MEM_H_ 3 | 4 | #define REHASH_LIMIT 20 5 | #define IDEAL_OCCUPANCY 0.75f 6 | #define WARN_OCCUPANCY 0.9f 7 | // bucket size must be <256 8 | #define MAX_BUCKET_SIZE 48 9 | 10 | // Hash table capacity is x*(2^y) where x and y are parameters 11 | // memory is x*(2^y)*sizeof(BinaryKmer) + (2^y) * 2 12 | static inline size_t ht_mem(size_t bktsize, size_t nbkts, size_t nbits) { 13 | return (bktsize * nbkts * nbits)/8 + (nbkts) * sizeof(uint8_t[2]); 14 | } 15 | 16 | // Returns capacity of a hash table that holds at least nkmers 17 | size_t hash_table_cap(uint64_t nkmers, uint64_t *num_bkts_ptr, uint8_t *bkt_size_ptr); 18 | 19 | // Returns memory required to hold nkmers 20 | size_t hash_table_mem(uint64_t nkmers, size_t entrybits, uint64_t *nkmers_ptr); 21 | 22 | // Returns memory used for hashtable no more than some memory limit 23 | size_t hash_table_mem_limit(size_t memlimit, size_t entrybits, uint64_t *nkmers_ptr); 24 | 25 | #endif /* HASH_MEM_H_ */ 26 | -------------------------------------------------------------------------------- /tests/vcfcov/calls3/calls.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.2 2 | ##FILTER= 3 | ##fileDate=20151014 4 | ##reference=ref/ref.fa 5 | ##contig= 6 | ##FORMAT= 7 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 8 | ref 2 . C GCAAGCTTTTCTAATTCGTAT . PASS . . 9 | ref 10 . A C,GCAAGCTTTTCTAATTCGTAT . PASS . . 10 | ref 11 . A GCAAGCTTTTCTAATTCGTAT . PASS . . 11 | ref 17 . A G . PASS . . 12 | ref 31 . A T . PASS . . 13 | ref 32 . C T . PASS . . 14 | ref 33 . G A . PASS . . 15 | ref 35 . C G . PASS . . 16 | ref 54 . A G . PASS . . 17 | ref 56 . G C . PASS . . 18 | ref 80 . C T . PASS . . 19 | ref 85 . G A . PASS . . 20 | ref 90 . C G . PASS . . 21 | ref 91 . A GCAAGCTTTTCTAATTCGTAT . PASS . . 22 | ref 120 . G A . PASS . . 23 | ref 122 . G T . PASS . . 24 | ref 140 . A C . PASS . . 25 | ref 142 . G A . PASS . . 26 | ref 144 . C T . PASS . . 27 | ref 146 . C A . PASS . . 28 | ref 146 . C G . PASS . . 29 | ref 167 . T C . PASS . . 30 | -------------------------------------------------------------------------------- /results/var_calling_10ecoli/about.txt: -------------------------------------------------------------------------------- 1 | 2 | # Cortex_var only compiles and runs cleanly on Linux 3 | # Dependcies: Cortex_var, vcf-hack, stampy, vcftools 4 | 5 | Simulate a diploid with 60X coverage (30X per chrom). 1% seq error and 6 | 100bp read length, with paired end reads (insert length ~250bp, stddev = 50). 7 | 8 | Diploid is simulated by mutating a ref twice to create two chromosomes. We then 9 | call variants with the bubble caller including the reference. Comparing against 10 | the truth gives us a FDR and sensitivity measurement. 11 | 12 | Run simulation with k=21 and k=31: 13 | 14 | make K=21 run-mccortex run-cortex >& results/20150428.tues.k21.linux.txt 15 | make K=31 run-mccortex run-cortex >& results/20150428.tues.k31.linux.txt 16 | 17 | Re-run again with read lengths of 150bp: 18 | 19 | make K=21 run-mccortex run-cortex >& results/20150429.wed.k21.linux.txt 20 | make K=31 run-mccortex run-cortex >& results/20150429.wed.k31.linux.txt 21 | 22 | Clean with: 23 | 24 | make clean K=21 25 | make clean K=31 26 | -------------------------------------------------------------------------------- /src/tools/pop_bubbles.h: -------------------------------------------------------------------------------- 1 | #ifndef POP_BUBBLES_H_ 2 | #define POP_BUBBLES_H_ 3 | 4 | #include "db_graph.h" 5 | 6 | typedef struct 7 | { 8 | int max_rmv_covg, max_rmv_klen, max_rmv_kdiff; 9 | } PopBubblesPrefs; 10 | 11 | /** 12 | * visited, rmvbits should each have at least db_graph->capacity bits 13 | * and should be initialised to zeros 14 | * rmvbits will have bits set for all nodes that should be removed 15 | * @param max_rmv_covg only remove contigs with mean covg <= max_rmv_covg, 16 | * ignored if <= 0. 17 | * @param max_rmv_klen only remove contigs with num kmers <= max_rmv_klen, 18 | * ignored if <= 0. 19 | * @param max_rmv_kdiff only remove contigs if max diff in kmers <= max_rmv_kdiff, 20 | * ignored if < 0. 21 | * @return number of bubbles popped 22 | **/ 23 | size_t pop_bubbles(const dBGraph *db_graph, size_t nthreads, 24 | PopBubblesPrefs prefs, 25 | uint8_t *visited, uint8_t *rmvbits); 26 | 27 | #endif /* POP_BUBBLES_H_ */ 28 | -------------------------------------------------------------------------------- /results/var_calling_diploid_chr22_1Mbp/about.txt: -------------------------------------------------------------------------------- 1 | 2 | # Cortex_var only compiles and runs cleanly on Linux 3 | # Dependcies: Cortex_var, vcf-hack, stampy, vcftools 4 | 5 | Simulate a diploid with 60X coverage (30X per chrom). 1% seq error and 6 | 100bp read length, with paired end reads (insert length ~250bp, stddev = 50). 7 | 8 | Diploid is simulated by mutating a ref twice to create two chromosomes. We then 9 | call variants with the bubble caller including the reference. Comparing against 10 | the truth gives us a FDR and sensitivity measurement. 11 | 12 | Run simulation with k=21 and k=31: 13 | 14 | make K=21 run-mccortex run-cortex >& results/20150428.tues.k21.linux.txt 15 | make K=31 run-mccortex run-cortex >& results/20150428.tues.k31.linux.txt 16 | 17 | Re-run again with read lengths of 150bp: 18 | 19 | make K=21 run-mccortex run-cortex >& results/20150429.wed.k21.linux.txt 20 | make K=31 run-mccortex run-cortex >& results/20150429.wed.k31.linux.txt 21 | 22 | Clean with: 23 | 24 | make clean K=21 25 | make clean K=31 26 | -------------------------------------------------------------------------------- /travis/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # Only run if we are not doing Coverity Scan analysis 6 | # The COVERITY_SCAN_BRANCH environment variable is not set until AFTER install 7 | # step has run, so we do a check on which git branch we have 8 | if [ $(git rev-parse --abbrev-ref HEAD) != "coverity_scan" ] 9 | then 10 | 11 | # Compile third party code 12 | cd libs && make && cd .. 13 | 14 | # Set up cpanm, install JSON perl package 15 | # Using default ~/perl5 local directory 16 | curl -L https://cpanmin.us | perl - App::cpanminus 17 | ~/perl5/bin/cpanm --force --local-lib=~/perl5 local::lib && eval $(perl -I ~/perl5/lib/perl5/ -Mlocal::lib) 18 | ~/perl5/bin/cpanm --force --local-lib=~/perl5 JSON 19 | 20 | # Set up installing perl modules library path with: 21 | # eval "$(perl -I$HOME/perl5/lib/perl5 -Mlocal::lib)" 22 | echo '[ $SHLVL -eq 1 ] && eval "$(perl -I$HOME/perl5/lib/perl5 -Mlocal::lib)"' >> ~/.bashrc 23 | echo '[ $SHLVL -eq 1 ] && eval "$(perl -I$HOME/perl5/lib/perl5 -Mlocal::lib)"' >> ~/.profile 24 | 25 | fi 26 | -------------------------------------------------------------------------------- /scripts/analysis/sam-count-vars.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Takes input from sam-count-bases e.g.: 4 | # NC_009648.1 21 A 66 0 0 66 0 5 | 6 | use strict; 7 | use warnings; 8 | use List::Util qw( reduce ); 9 | 10 | my %dna = ('A'=>0, 'C'=>1, 'G'=>2, 'T'=>3); 11 | my $ncalls = 0; 12 | my $ngood_calls = 0; 13 | my $min_cov_frac = 0.9; 14 | 15 | for($ncalls = 0; defined(my $line = <>); $ncalls++) { 16 | chomp($line); 17 | my @cols = split('\s', $line); 18 | my $ref_base = $cols[2]; 19 | my $tot_cov = $cols[3]; 20 | my %cov = ('A'=>$cols[4+0], 'C'=>$cols[4+1], 'G'=>$cols[4+2], 'T'=>$cols[4+3]); 21 | my $max_base = reduce { $cov{$a} > $cov{$b} ? $a : $b } keys %cov; 22 | if($max_base ne $ref_base && $cov{$max_base} >= $min_cov_frac*$tot_cov) { 23 | $ngood_calls++; 24 | print "".join("\t", "GOOD", @cols, $max_base)."\n"; 25 | } else { 26 | print "".join("\t", "BAD", @cols, $max_base)."\n"; 27 | } 28 | } 29 | 30 | print "$ngood_calls / $ncalls (" . 31 | sprintf("%.2f", $ncalls ? (100*$ngood_calls)/$ncalls : 0) . 32 | "%)\n"; 33 | -------------------------------------------------------------------------------- /src/graph/graph_format.c: -------------------------------------------------------------------------------- 1 | #include "global.h" 2 | #include "graph_format.h" 3 | 4 | void graph_header_capacity(GraphFileHeader *h, size_t num_of_cols) 5 | { 6 | size_t i; 7 | 8 | if(num_of_cols > h->capacity) { 9 | h->ginfo = ctx_recallocarray(h->ginfo, h->capacity, num_of_cols, sizeof(GraphInfo)); 10 | for(i = h->capacity; i < num_of_cols; i++) 11 | graph_info_alloc(&h->ginfo[i]); 12 | h->capacity = num_of_cols; 13 | } 14 | } 15 | 16 | void graph_header_dealloc(GraphFileHeader *h) 17 | { 18 | size_t i; 19 | for(i = 0; i < h->capacity; i++) 20 | graph_info_dealloc(&h->ginfo[i]); 21 | ctx_free(h->ginfo); 22 | memset(h, 0, sizeof(*h)); 23 | } 24 | 25 | void graph_header_print(const GraphFileHeader *header) 26 | { 27 | printf("HEADER\n"); 28 | printf(" version: %u\n", header->version); 29 | printf(" kmer_size: %u\n", header->kmer_size); 30 | printf(" num_of_bitfields: %u\n", header->num_of_bitfields); 31 | printf(" num_of_cols: %u\n", header->num_of_cols); 32 | printf(" [capacity: %zu]\n", header->capacity); 33 | } 34 | -------------------------------------------------------------------------------- /tests/vcfcov/calls4/pluto.fa: -------------------------------------------------------------------------------- 1 | >ref_ref 2 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG 3 | >ref_ref 4 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG 5 | >ref_alt 6 | ACTATGtCCAAAGAGCAATACCCCCCGcTGACGGCTAGGTTGTTTTTCcgatCTCAAGACTCTATCCTGCGGgatCCGTTCCGCAGGCGTGCCCAacTCCGTACATTAATACCGTCGgGACTTACTTATTAAGCGTAGttactcttcATATTTCCGCTGGCCCTTACctagACCTAGTTTGTCCATAGAGCaG 7 | >ref_alt 8 | ACTATGtCCAAAGAGCAATACCCCCCGcTGACGGCTAGGTTGTTTTTCcgatCTCAAGACTCTATCCTGCGGgatCCGTTCCGCAGGCGTGCCCAacTCCGTACATTAATACCGTCGgGACTTACTTATTAAGCGTAGttactcttcATATTTCCGCTGGCCCTTACctagACCTAGTTTGTCCATAGAGCaG 9 | >ref_alt 10 | ACTATGtCCAAAGAGCAATACCCCCCGcTGACGGCTAGGTTGTTTTTCcgatCTCAAGACTCTATCCTGCGGgatCCGTTCCGCAGGCGTGCCCAacTCCGTACATTAATACCGTCGgGACTTACTTATTAAGCGTAGttactcttcATATTTCCGCTGGCCCTTACctagACCTAGTTTGTCCATAGAGCaG 11 | -------------------------------------------------------------------------------- /src/basic/decomp_breakpoint.h: -------------------------------------------------------------------------------- 1 | #ifndef DECOMP_BREAKPOINT_H_ 2 | #define DECOMP_BREAKPOINT_H_ 3 | 4 | #include "aligned_call.h" 5 | #include "call_file_reader.h" 6 | #include "seq_reader.h" // genome hash 7 | 8 | typedef struct { 9 | uint64_t nflanks_not_uniquely_mapped, nflanks_diff_chroms; 10 | uint64_t nflanks_diff_strands, nflanks_overlap_too_much; 11 | uint64_t ncalls, ncalls_mapped; 12 | } DecompBreakpointStats; 13 | 14 | typedef struct DecompBreakpointStruct DecompBreakpoint; 15 | 16 | DecompBreakpoint* decomp_brkpt_init(); 17 | void decomp_brkpt_destroy(DecompBreakpoint *bd); 18 | 19 | void decomp_brkpt_cpy_stats(DecompBreakpointStats *stats, 20 | const DecompBreakpoint *bd); 21 | 22 | // Convert a call into an aligned call 23 | // return 0 on success, otherwise non-zero on failure 24 | int decomp_brkpt_call(DecompBreakpoint *db, 25 | ChromHash *genome, size_t nsamples, 26 | const CallFileEntry *centry, 27 | AlignedCall *ac); 28 | 29 | #endif /* DECOMP_BREAKPOINT_H_ */ 30 | -------------------------------------------------------------------------------- /results/klebsiella/kleb_pneumoniae/freebayes/freebayes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eou pipefail 4 | set -o xtrace 5 | 6 | REF=/data2/users/turner/cortex_sims/klebsiella/kleb_pneumoniae/ref/GCF_000016305.1_ASM1630v1_genomic.fa 7 | BAM=/data2/users/turner/cortex_sims/klebsiella/kleb_pneumoniae/remap/mapped/KlebPneu.bam 8 | BAMRMDUP=/data2/users/turner/cortex_sims/klebsiella/kleb_pneumoniae/remap/mapped/KlebPneu.rmdup.bam 9 | 10 | CTXDIR=~/mccortex 11 | FREEBAYES=~/bioinf/freebayes/bin/freebayes 12 | BGZIP=$CTXDIR/libs/htslib/bgzip 13 | BCFTOOLS=$CTXDIR/libs/bcftools/bcftools 14 | 15 | $FREEBAYES -f $REF -p 1 $BAMRMDUP > freebayes.rmdup.vcf 16 | $BGZIP freebayes.rmdup.vcf 17 | $BCFTOOLS index freebayes.rmdup.vcf.gz 18 | 19 | $BCFTOOLS norm --check-ref x -m -any --fasta-ref $REF --site-win 5000 freebayes.rmdup.vcf.gz | \ 20 | $BCFTOOLS norm --rm-dup any --do-not-normalize | \ 21 | $VCF_PASS > freebayes.vcf 22 | $BGZIP freebayes.vcf 23 | $BCFTOOLS index freebayes.vcf.gz 24 | 25 | # Analysis 26 | rm -rf mummer_isec mapping_truth cortex.k31.k61.{mapping,isec}.log 27 | ./analysis.sh >& analysis.log 28 | -------------------------------------------------------------------------------- /results/benchmark/10diploid10X/smaller.fa: -------------------------------------------------------------------------------- 1 | >smaller 2 | TCATAACCCCCTGACCTATGAAACTTTGATTATCAAAGATACAAAGACCGAAGGTTATATCCGACGGGAAGCCGGAGGAG 3 | CGTTTGCCCTATAGTTACCTCTACCGAAATGGGATCACGAAGACTTGCGCGAGAAGCACGACGAGTTTTGTCTCGTGTTT 4 | TCATCGCAATGCAAGGGCTTTCTTCGGAGCTCTGCGTGCCTACTCTCGAAGTGCCACCTCCAGTTGGCTTCATCAGGTGT 5 | GCTACAGGCCTGGACCAGATGGTAGTAGAGGCAAAGGACTGAGTTTGGTTCGATCTAAGCGAAACTTAGATCCTTTACGG 6 | TGCTTGTAATCAGCTTCAGAGATATTGCCTTAACATCTTTGCCGAGGGCAGATACAGAGAAAAATGGATCTTTGAGATTA 7 | TGATGTTACGGCAGTACGTGCGGTCTTCAGATTCTCCGACAGATATACCATATTACGCCCTACTCGCTATAATAGGCGGT 8 | GTAAGATATACCGGAATCACAATCGGAGATGAACGAGTTACGTCGAAATGATTGTTATTATGTCCGTCCTCCACCCTAAT 9 | GGGATTTTTACCTACCCGACTGCAAGTTGGTCGTATGCGACGTTTTCTTTTCGCATTTAGACCCACCAGGGTACATTTAC 10 | AGATAGGGAGTTCCGTCCCTTCGCCTGCTCACGAAAATGCTCAGCCGTTTAATTATGCTGTTCATACTATGCACATTTTA 11 | AGAGCCATGTTTGGAGACCACGAGATACTCTTAATTCCGGTCCCAAGTTGAATCTGCTGTGTCCTTAACACACACCCGAA 12 | GGCATCAGGCCTCGGAGGCATGTACCAGTGCCACCTACCAAAGGTCTTGAAATCACCTTAGTTCAGTACGCTTGTAAAAA 13 | CATCACTTGATTCATCTCGGAATACTCACGTCGGGCGTGCGACACAAAACGTTCCATAGGAAGCACGCCTACGATTTAGC 14 | GAGTTTGGACAGCCCTAGACTATGTTATCCATGATTCGAC 15 | -------------------------------------------------------------------------------- /src/global/global.c: -------------------------------------------------------------------------------- 1 | #include "global.h" 2 | 3 | #include // for seeding random 4 | #include // getpid() 5 | 6 | #include "ctx_output.h" // ctx_output_init() 7 | 8 | #define strhash_fast_mix(h,x) ((h) * 37 + (x)) 9 | #define rotl32(h,r) ((h)<<(r)|(h)>>(32-(r))) 10 | 11 | static inline uint32_t get_rand_seed() 12 | { 13 | struct timeval now; 14 | gettimeofday(&now, NULL); 15 | 16 | uint32_t h = rand(); 17 | h = strhash_fast_mix(h, rotl32((uint32_t)now.tv_sec, h & 31)); 18 | h = strhash_fast_mix(h, rotl32((uint32_t)now.tv_usec, h & 31)); 19 | h = strhash_fast_mix(h, (uint32_t)getpid()); 20 | return h; 21 | } 22 | 23 | void seed_random() 24 | { 25 | uint32_t seed = get_rand_seed(); 26 | srand(seed); 27 | srand48(~seed); 28 | } 29 | 30 | void cortex_init() 31 | { 32 | seed_random(); 33 | // Cannot use die/warn/message/timestamp until we have completed setup 34 | ctx_output_init(); 35 | // Now safe to use die/warn/message/timestamp methods 36 | // since mutex and cmdcode have been set 37 | } 38 | 39 | void cortex_destroy() 40 | { 41 | ctx_output_destroy(); 42 | } 43 | -------------------------------------------------------------------------------- /scripts/calculations/bloom-filter-fpr.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | # Walking through 100 copies of Alu, assume ~300 kmers each 7 | # ~1 million copies Alu in the human genome 8 | 9 | my $alu_length = 300; 10 | my $n_copies = 100; 11 | 12 | # Power in perl is ** 13 | my $k = 3; 14 | my $m = 2**22 * 8; # 4MB, 33.6 million bits 15 | my $success = 1; 16 | my $num_rep_kmers = $n_copies*$alu_length; 17 | my $false_pos_rate; 18 | 19 | for(my $i = 0; $i < $num_rep_kmers; $i++) { 20 | $false_pos_rate = bloom_false_pos($k,$m,$i); 21 | $success *= (1-$false_pos_rate); 22 | } 23 | 24 | # Complete => traversing $num_rep_kmers without a false positive 25 | print "$k hash functions; $m bits;\n"; 26 | print "$num_rep_kmers false positive rate: $false_pos_rate\n"; 27 | print "complete success rate: $success\n"; 28 | print "complete failure rate: ".(1-$success)."\n"; 29 | 30 | # k is the number of hash functions 31 | # m is the number of total bits 32 | # n is the number of bits set 33 | sub bloom_false_pos 34 | { 35 | my ($k,$m,$n) = @_; 36 | return (1 - exp(1)**(-$k * $n / $m))**$k; 37 | } 38 | 39 | -------------------------------------------------------------------------------- /src/basic/hash.h: -------------------------------------------------------------------------------- 1 | #ifndef HASH_H_ 2 | #define HASH_H_ 3 | 4 | // Hash functions 5 | #if defined(USE_CITY_HASH) 6 | // Use Google's CityHash 7 | #include "misc/city.h" 8 | #define HASH_NAME_STR "CityHash32" 9 | #define ctx_hash32(src,n,rehash) ((uint32_t)CityHash64WithSeed((char*)(src), (n), (rehash))) 10 | #define ctx_hash64(src,n,rehash) CityHash64WithSeed((char*)(src), (n), (rehash)) 11 | #elif defined(USE_XXHASH) 12 | // Use xxHash 13 | #include "xxHash/xxhash.h" 14 | #define HASH_NAME_STR "xxHash32" 15 | #define ctx_hash32(src,n,rehash) XXH32((src), (n), (rehash)) 16 | #define ctx_hash64(src,n,rehash) XXH64((src), (n), (rehash)) 17 | #else 18 | // Use Bob Jenkin's lookup3 19 | #include "misc/lookup3.h" 20 | #define HASH_NAME_STR "Lookup3" 21 | #define ctx_hash32(src,n,rehash) lk3_hashlittle((src), (n), (rehash)) 22 | 23 | static inline uint64_t ctx_hash64(void *ptr, size_t n, uint64_t init) 24 | { 25 | uint32_t a = init>>32, b = init; 26 | lk3_hashlittle2(ptr, n, &a, &b); // note: `a` slightly better mixed than `b` 27 | return (((uint64_t)b<<32) | a); 28 | } 29 | 30 | #endif 31 | 32 | #endif /* HASH_H_ */ 33 | -------------------------------------------------------------------------------- /tests/pjoin/pjoin1/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Sanity check that merging matching link files gives correct counts 3 | # 4 | 5 | SHELL:=/bin/bash -euo pipefail 6 | 7 | K=7 8 | CTXDIR=../../.. 9 | MCCORTEX=$(shell echo $(CTXDIR)/bin/mccortex$$[(($(K)+31)/32)*32 - 1]) 10 | CTXPIPELINE=$(CTXDIR)/scripts/make-pipeline.pl 11 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat 12 | 13 | REFLEN=5000 14 | 15 | TGTS=genome0.fa genome0.k$(K).ctx genome0.k$(K).ctp.gz \ 16 | genome1.fa genome1.k$(K).ctx genome1.k$(K).ctp.gz \ 17 | joint.k$(K).ctp.gz 18 | 19 | all: joint.k$(K).ctp.gz 20 | 21 | genome%.fa: 22 | $(DNACAT) -n $(REFLEN) -M <(echo ref) -F > $@ 23 | 24 | genome%.k$(K).ctx: genome%.fa 25 | $(MCCORTEX) build -q -k $(K) --sample Genome0 -1 $< $@ 26 | 27 | genome%.k$(K).ctp.gz: genome%.k$(K).ctx genome%.fa 28 | $(MCCORTEX) thread -q -o $@ -1 genome$*.fa genome$*.k$(K).ctx 29 | 30 | joint.k$(K).ctp.gz: genome0.k$(K).ctp.gz genome1.k$(K).ctp.gz 31 | $(MCCORTEX) pjoin -q -n 1M -o $@ genome0.k$(K).ctp.gz genome0.k$(K).ctp.gz genome1.k$(K).ctp.gz genome0.k$(K).ctp.gz genome1.k$(K).ctp.gz 32 | 33 | clean: 34 | rm -rf $(TGTS) 35 | 36 | .PHONY: all clean 37 | -------------------------------------------------------------------------------- /tests/graphviz/Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash -euo pipefail 2 | 3 | K=15 4 | CTXDIR=../.. 5 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K) 6 | CTX2DOT=$(CTXDIR)/scripts/perl/mccortex-graph-to-graphviz.pl 7 | 8 | all: seq.k$(K).ctx seq.k$(K).unitigs.dot seq.k$(K).kmers.dot 9 | 10 | plots: seq.k$(K).unitigs.pdf seq.k$(K).kmers.pdf 11 | 12 | seq.fa: 13 | echo TACATTCCCCCATAGTCGTAGGCGTTAAATACA > seq.fa 14 | echo TACATTCCCCCATAGTAGTAGGCGTTAAATACA >> seq.fa 15 | echo GGCGTTCGCTTATCCGGATAAGCGAACGCC >> seq.fa 16 | echo ATATATATATATATATATATATATATATATATATAT >> seq.fa 17 | echo ACTTCTTCGAAAAAAAAAAAAAAATACTGGCCCTAACTTCTTCGAAAAAA >> seq.fa 18 | 19 | seq.k$(K).ctx: seq.fa 20 | $(MCCORTEX) build -q -k $(K) --sample MasterSeq --seq seq.fa seq.k$(K).ctx 21 | 22 | seq.k$(K).unitigs.dot: seq.k$(K).ctx 23 | $(MCCORTEX) unitigs -q --dot $< > $@ 24 | 25 | seq.k$(K).kmers.dot: seq.k$(K).ctx 26 | $(CTX2DOT) $< > $@ 27 | 28 | %.pdf: %.dot 29 | cat $< | dot -Tpdf > $@ 30 | 31 | clean: 32 | rm -rf seq.fa seq.k$(K).ctx 33 | rm -rf seq.k$(K).unitigs.dot seq.k$(K).kmers.dot 34 | rm -rf seq.k$(K).unitigs.pdf seq.k$(K).kmers.pdf 35 | 36 | .PHONY: all clean plots 37 | -------------------------------------------------------------------------------- /tests/pjoin/pjoin0/Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash -euo pipefail 2 | 3 | K=9 4 | CTXDIR=../../.. 5 | MCCORTEX=$(shell echo $(CTXDIR)/bin/mccortex$$[(($(K)+31)/32)*32 - 1]) 6 | 7 | LINKS=paths.0.k$(K).ctp.gz paths.1.k$(K).ctp.gz 8 | SEQ=genome.0.fa genome.1.fa 9 | GRAPHS=$(SEQ:.fa=.k$(K).ctx) 10 | MERGED=genomes.k$(K).ctx genomes.k$(K).ctp.gz 11 | 12 | TGTS=$(SEQ) $(GRAPHS) $(LINKS) $(MERGED) 13 | 14 | # non-default target: genome.k$(K).pdf 15 | 16 | all: $(TGTS) 17 | 18 | clean: 19 | rm -rf $(TGTS) 20 | 21 | genome.0.fa: 22 | echo TGGTGTCGCCTACA > $@ 23 | echo TtGTGTCGCCTAgA >> $@ 24 | 25 | genome.1.fa: 26 | echo TtGTGTCGCCTACA > $@ 27 | echo TGGTGTCGCCTAgA >> $@ 28 | 29 | genome.%.k$(K).ctx: genome.%.fa 30 | $(MCCORTEX) build -q -m 1M -k $(K) --sample Gnome$* --seq genome.$*.fa $@ 31 | 32 | paths.%.k$(K).ctp.gz: genome.%.k$(K).ctx genome.%.fa 33 | $(MCCORTEX) thread -q -m 1M --seq genome.$*.fa -o $@ genome.$*.k$(K).ctx 34 | gunzip -c $@ 35 | 36 | genomes.k$(K).ctx: $(LINKS) 37 | $(MCCORTEX) join -q -o $@ $(GRAPHS) 38 | 39 | genomes.k$(K).ctp.gz: $(LINKS) 40 | $(MCCORTEX) pjoin -q -o $@ $(LINKS) 41 | gunzip -c $@ 42 | 43 | .PHONY: all plots clean 44 | -------------------------------------------------------------------------------- /libs/cJSON/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 Dave Gamble 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Isaac Turner 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/threading/threading1/Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash -euo pipefail 2 | 3 | K=31 4 | CTXDIR=../../.. 5 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K) 6 | CTX2DOT=$(CTXDIR)/scripts/perl/mccortex-graph-to-graphviz.pl 7 | 8 | GRAPHS=ref.k$(K).ctx ref.ctp.gz 9 | LOGS=$(addsuffix .log,$(GRAPHS)) 10 | PLOTS=ref.pdf ref.dot 11 | TGTS=ref.fa $(GRAPHS) 12 | 13 | all: $(TGTS) 14 | 15 | plots: ref.dot ref.pdf 16 | 17 | ref.fa: 18 | echo CGATTGAATTCCACCGATAATGCAGATGTGAGCCTCAGCATCTACTGCTTCCTCGTCGTCGGGGACTTTTGTTGACCTACCACATGATACATGCGGCCAT > $@ 19 | echo ACAAGCTAAAGAAGCTAGCCAGTGCAGGCTCCCTTCAGCATCTACTGCTTCCTCGTCGTCGGGGACTAGAAACGTGACCATCGGCCACCGAAAGATAAGG >> $@ 20 | echo ACAAGCTAAAGAAGCTAGCCAGTGCAGGCTCCCTTCAGCATCTACTGCTGGCTCGTCGTCGGGGACTAGAAACGTGACCATCGGCCACCGAAAGATAAGG >> $@ 21 | 22 | ref.k$(K).ctx: ref.fa 23 | $(MCCORTEX) build -m 1M -k $(K) --sample MsSample --seq ref.fa ref.k$(K).ctx >& $@.log 24 | 25 | ref.ctp.gz: ref.k$(K).ctx 26 | $(MCCORTEX) thread -m 1M --seq ref.fa -o $@ $< >& $@.log 27 | 28 | ref.dot: ref.k$(K).ctx 29 | $(CTX2GV) --simplify $< > $@ 30 | 31 | ref.pdf: ref.dot 32 | dot -Tpdf $< > $@ 33 | 34 | clean: 35 | rm -rf $(TGTS) $(PLOTS) $(LOGS) 36 | 37 | .PHONY: all plots clean 38 | -------------------------------------------------------------------------------- /tests/vcfcov/calls4/truth.cov.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.2 2 | ##FILTER= 3 | ##fileDate=20151014 4 | ##reference=ref/ref.fa 5 | ##contig= 6 | ##FORMAT= 7 | ##FORMAT= sum(kmer_covs)/exp_num_kmers"> 8 | ##FORMAT= sum(kmer_covs)/exp_num_kmers"> 9 | ##mccortex_5a56358= 10 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT pluto 11 | ref 7 . G T . PASS . K21R:K21A 2:3 12 | ref 28 . A C . PASS . K21R:K21A 2:3 13 | ref 49 . GG CGAT . PASS . K21R:K21A 2:3 14 | ref 71 . A GAT . PASS . K21R:K21A 2:3 15 | ref 92 . GCACCAGGG AC . PASS . K21R:K21A 2:3 16 | ref 121 . C G . PASS . K21R:K21A 2:3 17 | ref 142 . GGCACAGCA TTACTCTTC . PASS . K21R:K21A 2:3 18 | ref 171 . A CTAG . PASS . K21R:K21A 2:3 19 | ref 192 . CATCATAG A . PASS . K21R:K21A 2:3 20 | -------------------------------------------------------------------------------- /scripts/make-isec.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Exit immediately if a command exits with a non-zero status. 4 | set -euo pipefail 5 | 6 | if [[ $# -ne 3 ]] 7 | then 8 | echo "usage: $0 " 1>&2 9 | echo " Create tmp dir and use it to count intersection of indexed VCFs" 1>&2 10 | exit -1 11 | fi 12 | 13 | TMPDIR="$1" 14 | TRUTHVCF="$2" 15 | RESULTSVCF="$3" 16 | 17 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 18 | CTXDIR="$DIR/.." 19 | 20 | BCFTOOLS=$CTXDIR"/libs/bcftools/bcftools" 21 | VCFALLELES=$CTXDIR"/libs/biogrok/vcf-count-alleles" 22 | 23 | $BCFTOOLS isec $TRUTHVCF $RESULTSVCF -p $TMPDIR 24 | 25 | MISSED=`$VCFALLELES $TMPDIR/0000.vcf` 26 | FP=`$VCFALLELES $TMPDIR/0001.vcf` 27 | FOUND=`$VCFALLELES $TMPDIR/0002.vcf` 28 | NCALLED=`$VCFALLELES $RESULTSVCF` 29 | NTRUTH=`$VCFALLELES $TRUTHVCF` 30 | 31 | awk 'BEGIN{printf("Missed: %4d / %4d (%5.2f%%)\n",'$MISSED','$NTRUTH',100*'$MISSED'/'$NTRUTH')}' 32 | awk 'BEGIN{printf("FP: %4d / %4d (%5.2f%%)\n",'$FP','$NCALLED',100*'$FP'/'$NCALLED')}' 33 | awk 'BEGIN{printf("Found: %4d / %4d (%5.2f%%)\n",'$FOUND','$NTRUTH',100*'$FOUND'/'$NTRUTH')}' 34 | 35 | echo "remember to delete temp dir: $TMPDIR" 1>&2 36 | -------------------------------------------------------------------------------- /src/tools/vcf_coverage.h: -------------------------------------------------------------------------------- 1 | #ifndef VCF_COVERAGE_H_ 2 | #define VCF_COVERAGE_H_ 3 | 4 | #include "db_graph.h" 5 | 6 | #include "htslib/vcf.h" 7 | #include "htslib/faidx.h" 8 | 9 | #define DEFAULT_MAX_ALLELE_LEN 100 10 | #define DEFAULT_MAX_GT_VARS 8 11 | 12 | typedef struct { 13 | // Stats 14 | uint64_t nvcf_lines, nalts_read, nalts_loaded; 15 | uint64_t nalts_too_long, nalts_no_covg, nalts_with_covg; 16 | uint64_t ngt_kmers; 17 | } VcfCovStats; 18 | 19 | typedef struct { 20 | const char *kcov_ref_tag, *kcov_alt_tag; 21 | // Don't attempt to genotype alleles bigger than this 22 | // defaults to DEFAULT_MAX_ALLELE_LEN 23 | uint32_t max_allele_len; 24 | // 2^8 = 256 possible haplotypes 25 | // defaults to DEFAULT_MAX_GT_VARS 26 | uint32_t max_gt_vars; 27 | bool load_kmers_only; 28 | } VcfCovPrefs; 29 | 30 | void vcfcov_file(htsFile *vcffh, bcf_hdr_t *vcfhdr, 31 | htsFile *outfh, bcf_hdr_t *outhdr, 32 | const char *path, faidx_t *fai, 33 | const size_t *samplehdrids, 34 | const VcfCovPrefs *prefs, 35 | VcfCovStats *stats, 36 | dBGraph *db_graph); 37 | 38 | #endif /* VCF_COVERAGE_H_ */ 39 | -------------------------------------------------------------------------------- /tests/vcfcov/calls0/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash -euo pipefail 2 | 3 | # 4 | # Test vcfcov with an empty VCF 5 | # Works with any kmer size (K) 6 | # 7 | 8 | K=21 9 | CTXDIR=../../.. 10 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K) 11 | VCFNALLELES=$(CTXDIR)/libs/biogrok/vcf-count-alleles 12 | 13 | REF=../ref/ref.fa 14 | 15 | VCFS=calls.cov.k$(K).vcf lowmem.cov.k$(K).vcf 16 | GRAPH=graph.k$(K).ctx 17 | LOGS=$(VCFS:=.log) $(GRAPH:=.log) 18 | 19 | all: test 20 | 21 | clean: 22 | rm -rf $(VCFS) $(LOGS) $(GRAPH) 23 | 24 | calls.cov.k$(K).vcf: $(REF) calls.vcf $(GRAPH) 25 | $(MCCORTEX) vcfcov -m 10M -o $@ -r $(REF) --high-mem calls.vcf $(GRAPH) >& $@.log 26 | 27 | lowmem.cov.k$(K).vcf: $(REF) calls.vcf $(GRAPH) 28 | $(MCCORTEX) vcfcov -m 10M -o $@ -r $(REF) --low-mem calls.vcf graph.k$(K).ctx >& $@.log 29 | 30 | graph.k$(K).ctx: 31 | $(MCCORTEX) build -k $(K) \ 32 | --sample John --seq <(echo '') \ 33 | --sample Jane --seq <(echo '') \ 34 | $@ >& $@.log 35 | 36 | test: $(VCFS) 37 | [[ `$(VCFNALLELES) calls.cov.k$(K).vcf` -eq 0 ]] 38 | [[ `$(VCFNALLELES) lowmem.cov.k$(K).vcf` -eq 0 ]] 39 | @echo "=> Empty VCF works." 40 | 41 | view: calls.cov.k$(K).vcf 42 | gzip -fcd $< 43 | 44 | .PHONY: all clean view test 45 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Travis CI configuration 2 | # Note: there are currently issues with limiting coverity to a single compiler 3 | # see https://github.com/travis-ci/travis-ci/issues/1975 4 | # Instead we use a script to decide if we should do anything 5 | 6 | dist: trusty 7 | language: c 8 | compiler: 9 | - clang 10 | - gcc 11 | os: 12 | - linux 13 | - osx 14 | 15 | install: 16 | - ./travis/install.sh 17 | - eval "$(perl -I$HOME/perl5/lib/perl5 -Mlocal::lib)" 18 | 19 | script: ./travis/script.sh 20 | 21 | env: 22 | global: 23 | # The next declaration is the encrypted COVERITY_SCAN_TOKEN, created 24 | # via the "travis encrypt" command using the project repo's public key 25 | - secure: "T08ccfH7+agMchVPhAP/MTdVeFonkjlLvbY8nv/jQ5aZeeWP2i1Oop59MVtuaA4Vw1Ickjr1czLsHGob2OyMHwo2otEasMqacvOw38exblvjOptqN7dx2yu0qzBZdHxLd/uOad7HMfgtkVSwgpodeNR6+K+4LcCM7J6+iAWtLME=" 26 | 27 | addons: 28 | coverity_scan: 29 | project: 30 | name: "mcveanlab/mccortex" 31 | description: Build submitted via Travis CI 32 | notification_email: turner.isaac@gmail.com 33 | build_command_prepend: git submodule update --init --recursive 34 | build_command: make 35 | branch_pattern: coverity_scan 36 | -------------------------------------------------------------------------------- /tests/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | # 6 | # Run all the tests! 7 | # Isaac Turner 8 | # 2014-07-16 9 | # 10 | # cd into each directory and run `make` 11 | # 12 | 13 | if [[ ( $# -gt 1 ) || ( $# -eq 1 && $1 != 'noupdate' && $1 != 'update' ) ]] 14 | then 15 | echo "./run [update|noupdate]" 16 | exit -1 17 | fi 18 | 19 | cwd=`pwd` 20 | echo $cwd 21 | 22 | if [[ $# -eq 0 || $1 == 'update' ]] 23 | then 24 | # Get all dependencies used in testing (bioinf-perl, bcftools, samtools etc.) 25 | cd ../libs && make all && cd $cwd 26 | if [ $? -ne 0 ]; then exit -1; fi 27 | fi 28 | 29 | # Run cortex unit tests 30 | cd .. 31 | for k in 31 63 95 127 32 | do 33 | make test MAXK=$k STRICT=1 34 | done 35 | cd $cwd 36 | 37 | # Get list of current tests (all directories except 'old') 38 | dirs=`ls | grep -v '.*run.sh' | grep -v '^\.' | grep -v old` 39 | echo $dirs 40 | 41 | cd .. && make MAXK=31 RELEASE=1 && make MAXK=63 && cd $cwd 42 | if [ $? -ne 0 ]; then exit -1; fi 43 | 44 | for f in $dirs 45 | do 46 | echo && echo ===== && echo "Test: $cwd/$f" 47 | cd $f && make clean && make all && cd .. 48 | if [ $? -ne 0 ]; then exit -1; fi 49 | done 50 | 51 | echo $dirs 52 | echo All tests completed. 53 | -------------------------------------------------------------------------------- /scripts/perl/bubbles-example.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use File::Basename; 7 | 8 | # Use current directory to find modules 9 | use FindBin; 10 | use lib $FindBin::Bin; 11 | 12 | use McCortexBubbles; 13 | 14 | sub print_usage 15 | { 16 | for my $err (@_) { print STDERR "Error: $err\n"; } 17 | 18 | print STDERR "" . 19 | "Usage: $0 \n"; 20 | 21 | exit(-1); 22 | } 23 | 24 | if(@ARGV > 1) { print_usage(); } 25 | my ($file) = (@ARGV, "-"); 26 | my $fh; 27 | open($fh, "gzip -fcd $file |") or die("Cannot read file $file: $!"); 28 | 29 | my $cb = new McCortexBubbles($fh); 30 | my ($seq5p, $seq3p, $branches, $flank5p_nkmers, $flank3p_nkmers, $branchlens, $callid); 31 | 32 | while(1) 33 | { 34 | ($seq5p, $seq3p, $branches, 35 | $flank5p_nkmers, $flank3p_nkmers, $branchlens, $callid) = $cb->next(); 36 | if(!defined($seq5p)) { last; } 37 | 38 | print "BUBBLE $callid\n"; 39 | print ">flank5p $flank5p_nkmers nkmers=$flank5p_nkmers\n$seq5p\n"; 40 | print ">flank3p $flank5p_nkmers nkmers=$flank3p_nkmers\n$seq3p\n"; 41 | print "". join('', map {">branch$_ nkmers=$branchlens->[$_]\n$branches->[$_]\n"} 0..(@$branches-1)); 42 | print "\n"; 43 | } 44 | 45 | close($fh); 46 | -------------------------------------------------------------------------------- /src/paths/gpath_follow.h: -------------------------------------------------------------------------------- 1 | #ifndef GPATH_FOLLOW_H_ 2 | #define GPATH_FOLLOW_H_ 3 | 4 | #include "dna.h" 5 | #include "gpath.h" 6 | 7 | /* 8 | 9 | Link 'age' vs 'pos' 10 | 11 | \ \ / / 12 | ___\__\__/___X__/_ 13 | a ^--------------> 14 | b ^-----------> 15 | 16 | Links start at (a) and at (b). At (X) both links have pos=1, but link (a) has 17 | age=2 and (b) has age=1. 18 | 19 | */ 20 | 21 | // This struct is packed so we can hash it quickly 22 | struct GPathFollowStruct 23 | { 24 | const GPath *gpath; 25 | uint16_t pos, len; 26 | uint32_t age; // age is >= pos 27 | // A small buffer of upcoming 24 bases 28 | // uint16_t first_cached; // first base in buffer (multiple of 4: 0,4,8,...) 29 | // uint8_t cache[6]; // first..first+23 (24 bases) 30 | } __attribute__((packed)); 31 | 32 | typedef struct GPathFollowStruct GPathFollow; 33 | 34 | #include "madcrowlib/madcrow_buffer.h" 35 | madcrow_buffer(gpath_follow_buf,GPathFollowBuffer,GPathFollow); 36 | 37 | #define gpath_follow_get_base(path,pos) (binary_seq_get((path)->gpath->seq,pos)) 38 | // Nucleotide gpath_follow_get_base(GPathFollow *path, size_t pos); 39 | GPathFollow gpath_follow_create(const GPath *gpath); 40 | 41 | #endif /* GPATH_FOLLOW_H_ */ 42 | -------------------------------------------------------------------------------- /src/basic/decomp_bubble.h: -------------------------------------------------------------------------------- 1 | #ifndef DECOMP_BUBBLE_H_ 2 | #define DECOMP_BUBBLE_H_ 3 | 4 | #include "aligned_call.h" 5 | #include "call_file_reader.h" 6 | #include "seq_reader.h" // genome hash 7 | 8 | typedef struct { 9 | uint64_t nflank5p_unmapped, nflank5p_lowqual; 10 | uint64_t nflank3p_multihits, nflank3p_not_found; 11 | uint64_t nflank3p_exact_found, nflank3p_approx_found; 12 | uint64_t nflanks_overlap_too_much; 13 | uint64_t ncalls, ncalls_mapped; 14 | } DecompBubbleStats; 15 | 16 | typedef struct DecompBubbleStruct DecompBubble; 17 | 18 | DecompBubble* decomp_bubble_init(); 19 | void decomp_bubble_destroy(DecompBubble *db); 20 | 21 | void decomp_bubble_cpy_stats(DecompBubbleStats *stats, const DecompBubble *db); 22 | scoring_t* decomp_bubble_get_scoring(DecompBubble *db); 23 | 24 | // Convert a call into an aligned call 25 | // return 0 on success, otherwise non-zero on failure 26 | int decomp_bubble_call(DecompBubble *db, ChromHash *genome, 27 | size_t kmer_size, size_t min_mapq, 28 | const CallFileEntry *centry, 29 | const bam1_t *mflank, const bam_hdr_t *bhdr, 30 | AlignedCall *ac); 31 | 32 | #endif /* DECOMP_BUBBLE_H_ */ 33 | -------------------------------------------------------------------------------- /src/tools/generate_paths.h: -------------------------------------------------------------------------------- 1 | #ifndef GENERATE_PATHS_H_ 2 | #define GENERATE_PATHS_H_ 3 | 4 | #include "seq_file/seq_file.h" 5 | 6 | #include "cortex_types.h" 7 | #include "db_graph.h" 8 | #include "seq_loading_stats.h" 9 | #include "correct_aln_input.h" 10 | 11 | typedef struct GenPathWorker GenPathWorker; 12 | 13 | // Estimate memory required per worker thread 14 | size_t gen_paths_worker_est_mem(const dBGraph *db_graph); 15 | 16 | GenPathWorker* gen_paths_workers_alloc(size_t n, dBGraph *graph); 17 | 18 | void gen_paths_workers_dealloc(GenPathWorker *mem, size_t n); 19 | 20 | // Add a single contig using a given worker 21 | void gen_paths_worker_seq(GenPathWorker *wrkr, AsyncIOData *data, 22 | const CorrectAlnInput *task); 23 | 24 | // For testing 25 | void gen_paths_from_str_mt(GenPathWorker *gen_path_wrkr, char *seq, 26 | CorrectAlnParam params); 27 | 28 | // workers array must be at least as long as tasks 29 | void generate_paths(CorrectAlnInput *tasks, size_t num_tasks, 30 | GenPathWorker *workers, size_t num_workers); 31 | 32 | CorrectAlnStats* gen_paths_get_aln_stats(GenPathWorker *wrkr); 33 | SeqLoadingStats* gen_paths_get_stats(GenPathWorker *wrkr); 34 | 35 | #endif /* GENERATE_PATHS_H_ */ 36 | -------------------------------------------------------------------------------- /tests/clean_graph/clean2/Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash -euo pipefail 2 | 3 | K=17 4 | CTXDIR=../../.. 5 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K) 6 | CTX2DOT=$(CTXDIR)/scripts/perl/mccortex-graph-to-graphviz.pl 7 | 8 | GRAPHS=seq.k$(K).raw.ctx seq.k$(K).clean.ctx 9 | DOTS=$(GRAPHS:.ctx=.dot) $(GRAPHS:.ctx=.unitigs.dot) 10 | PLOTS=$(DOTS:.dot=.pdf) 11 | 12 | all: seq.fa $(GRAPHS) $(DOTS) 13 | 14 | plots: $(PLOTS) 15 | 16 | seq.fa: Makefile 17 | # Repeat of CAAAGGCCTCACGGGTA 18 | echo GTGAGGCCAAGCAAAGGCCTCACGGGTACAAAGGCCTCACGGGTAGAATCCCCTTTG > seq.fa 19 | echo GTGAGGCCAAGCAAAGGCCTCACGGGTAGAATCCCCTTTG >> seq.fa 20 | echo GTGAGGCCAAGCAAAGGCCTCACGGGTAGAATCCCCTTTG >> seq.fa 21 | echo AAAAAAAAAAAAAAAAATAAAAAAAAAAAAAAAAA >> seq.fa 22 | 23 | seq.k$(K).raw.ctx: seq.fa 24 | $(MCCORTEX) build -q -m 10M -k $(K) --sample SeqJr --seq $< $@ 25 | $(MCCORTEX) check -q $@ 26 | 27 | seq.k$(K).clean.ctx: seq.k$(K).raw.ctx 28 | $(MCCORTEX) clean -q --unitigs=2 -o $@ $< 29 | $(MCCORTEX) check -q $@ 30 | 31 | %.unitigs.dot: %.ctx 32 | $(MCCORTEX) unitigs -q -m 1M --dot $< > $@ 33 | # $(CTX2DOT) --simplify $< > $@ 34 | 35 | %.dot: %.ctx 36 | $(CTX2DOT) $< > $@ 37 | 38 | %.pdf: %.dot 39 | dot -Tpdf $< > $@ 40 | 41 | clean: 42 | rm -rf seq.fa $(GRAPHS) $(DOTS) $(PLOTS) 43 | 44 | .PHONY: all plots clean 45 | -------------------------------------------------------------------------------- /tests/breakpoint/breakpoint0/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Sanity check that calling breakpoints with only a ref and reads from the ref 3 | # gives no calls 4 | # 5 | 6 | SHELL:=/bin/bash -euo pipefail 7 | 8 | CTXDIR=../../.. 9 | CTXPIPELINE=$(CTXDIR)/scripts/make-pipeline.pl 10 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat 11 | READSIM=$(CTXDIR)/libs/readsim/readsim 12 | VCFCOUNT=$(CTXDIR)/libs/biogrok/vcf-count 13 | 14 | REFLEN=1000 15 | K=31 16 | SEQDEPTH=30 17 | READLEN=100 18 | OUTDIR=proj 19 | 20 | all: run 21 | 22 | ref.fa: 23 | $(DNACAT) -n $(REFLEN) -M <(echo ref) -F > $@ 24 | 25 | reads/reads.fa.gz: ref.fa 26 | mkdir -p reads 27 | $(READSIM) -r ref.fa -l $(READLEN) -s -d $(SEQDEPTH) reads/reads 28 | 29 | task.k$(K).mk: 30 | echo "RefReads reads/reads.fa.gz" | $(CTXPIPELINE) -r ref.fa $(K) proj - > $@ 31 | 32 | run: task.k$(K).mk reads/reads.fa.gz ref.fa 33 | $(MAKE) -f $< CTXDIR=$(CTXDIR) breakpoints-vcf 34 | @# Check no VCF entries 35 | (( `$(VCFCOUNT) proj/vcfs/breakpoints.joint.links.k$(K).vcf.gz` == 0 )) || false 36 | @# Check no breakpoint call entries 37 | (( `grep -c '>brkpnt' proj/k$(K)/breakpoints_links/joint.brk.gz` == 0 )) || false 38 | @echo 'Success: no breakpoint calls or VCF entries!' 39 | 40 | clean: 41 | rm -rf ref.fa* reads proj task.k$(K).mk 42 | 43 | .PHONY: all run clean 44 | -------------------------------------------------------------------------------- /tests/pop_bubbles/pop_bubbles1/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash -euo pipefail 2 | 3 | # Test pop bubbles with a single sample input file 4 | 5 | K=21 6 | CTXDIR=../../.. 7 | MCCORTEX=$(shell echo $(CTXDIR)/bin/mccortex$$[(($(K)+31)/32)*32 - 1]) 8 | 9 | SEQS=seq.fa truth.fa 10 | GRAPHS=seq.ctx popped.ctx truth.ctx 11 | 12 | all: popped.ctx truth.ctx check 13 | 14 | seq.fa: 15 | ( echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAGATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; \ 16 | echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAGATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; \ 17 | echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAcATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; ) > $@ 18 | 19 | truth.fa: 20 | ( echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAGATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; \ 21 | echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAGATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; \ 22 | echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGA; echo ATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; ) > $@ 23 | 24 | %.ctx: %.fa 25 | $(MCCORTEX) build -q -k $(K) --sample $* --seq $< $@ 26 | 27 | popped.ctx: seq.ctx 28 | $(MCCORTEX) popbubbles -q --out $@ $< 29 | 30 | check: truth.ctx popped.ctx 31 | diff -q <($(MCCORTEX) view -qk popped.ctx | sort) <($(MCCORTEX) view -qk truth.ctx | sort) && \ 32 | echo "Kmers match." 33 | 34 | clean: 35 | rm -rf $(SEQS) $(GRAPHS) 36 | 37 | .PHONY: all clean check 38 | -------------------------------------------------------------------------------- /scripts/perl/bubbles-to-contigs.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use File::Basename; 7 | 8 | # Use current directory to find modules 9 | use FindBin; 10 | use lib $FindBin::Bin; 11 | 12 | use McCortexBubbles; 13 | 14 | sub print_usage 15 | { 16 | for my $err (@_) { print STDERR "Error: $err\n"; } 17 | 18 | print STDERR "" . 19 | "Usage: $0 \n" . 20 | " Print a contig for each bubble branch. Prints to STDOUT.\n" . 21 | " Contigs are named: >BUBBLENAME.branchB\n" . 22 | " where B is the branch number\n"; 23 | 24 | exit(-1); 25 | } 26 | 27 | if(@ARGV != 1) { print_usage(); } 28 | my ($file) = (@ARGV,"-"); 29 | my $fh; 30 | open($fh, "gzip -fcd $file |") or die("Cannot read file $file: $!"); 31 | 32 | my $cb = new McCortexBubbles($fh); 33 | my ($seq5p, $seq3p, $branches, $flank5p_nkmers, $flank3p_nkmers, $branchlens, $callid); 34 | 35 | while(1) 36 | { 37 | ($seq5p, $seq3p, $branches, 38 | $flank5p_nkmers, $flank3p_nkmers, $branchlens, $callid) = $cb->next(); 39 | if(!defined($seq5p)) { last; } 40 | 41 | my ($len5p,$len3p) = (length($seq5p), length($seq3p)); 42 | 43 | for(my $i = 0; $i < @$branches; $i++) { 44 | print ">$callid.branch$i:$len5p:$len3p\n"; 45 | print $seq5p.$branches->[$i].$seq3p."\n"; 46 | } 47 | } 48 | 49 | close($fh); 50 | -------------------------------------------------------------------------------- /travis/provision-vm.sh: -------------------------------------------------------------------------------- 1 | sudo apt-get update 2 | sudo apt-get install -y g++ libncurses5-dev python-dev python3-dev emacs cmake autoconf 3 | 4 | # Stampy 5 | cd 6 | curl -O http://www.well.ox.ac.uk/~gerton/software/Stampy/stampy-latest.tgz 7 | tar xfz stampy-latest.tgz 8 | cd stampy 9 | make 10 | 11 | # VCFTools 12 | cd 13 | wget https://downloads.sourceforge.net/project/vcftools/vcftools_0.1.13.tar.gz 14 | tar xfz vcftools_0.1.13.tar.gz 15 | cd vcftools_0.1.13 16 | make 17 | 18 | # Cortex 19 | cd 20 | git clone --recursive https://github.com/iqbal-lab/cortex.git 21 | cd cortex 22 | bash install.sh 23 | for k in 31 63 95 127; do 24 | for ncol in 1 2 3 9 10 11; do 25 | make cortex_var MAXK=$k NCOLS=$ncol 26 | done 27 | done 28 | echo 'export PERL5LIB="${HOME}/cortex/scripts/analyse_variants/bioinf-perl/lib/:${HOME}/cortex/scripts/calling/:${PERL5LIB}"' >> .profile 29 | echo 'export PATH="${HOME}/cortex/scripts/analyse_variants/needleman_wunsch/:${PATH}"' >> .profile 30 | 31 | # McCortex 32 | cd 33 | git clone --recursive -b develop https://github.com/mcveanlab/mccortex.git 34 | cd mccortex 35 | cd libs && make all && cd .. 36 | for k in 31 63 95 127; do 37 | make all test MAXK=31 38 | done 39 | 40 | # Freebayes 41 | cd 42 | git clone --recursive https://github.com/ekg/freebayes.git 43 | cd freebayes 44 | make 45 | -------------------------------------------------------------------------------- /scripts/perl/breakpoints-example.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use File::Basename; 7 | 8 | # Use current directory to find modules 9 | use FindBin; 10 | use lib $FindBin::Bin; 11 | 12 | use McCortexBreakpoints; 13 | 14 | sub print_usage 15 | { 16 | for my $err (@_) { print STDERR "Error: $err\n"; } 17 | 18 | print STDERR "" . 19 | "Usage: $0 \n"; 20 | 21 | exit(-1); 22 | } 23 | 24 | if(@ARGV > 1) { print_usage(); } 25 | my ($file) = (@ARGV, "-"); 26 | my $fh; 27 | open($fh, "gzip -fcd $file |") or die("Cannot read file $file: $!"); 28 | 29 | my $cb = new McCortexBreakpoints($fh,$file); 30 | my ($seq5p, $seq3p, $pathseq, $flank5p_refs, $flank3p_refs, $cols, $callid); 31 | 32 | while(1) 33 | { 34 | ($seq5p, $seq3p, $pathseq, $flank5p_refs, $flank3p_refs, $cols, $callid) = $cb->next(); 35 | if(!defined($seq5p)) { last; } 36 | 37 | my @strs5p = map {$_->{'chrom'}.":".$_->{'start'}.'-'.$_->{'end'}} @$flank5p_refs; 38 | my @strs3p = map {$_->{'chrom'}.":".$_->{'start'}.'-'.$_->{'end'}} @$flank3p_refs; 39 | 40 | print "$callid\n"; 41 | print ">flank5p chrs=".join(',', @strs5p)."\n$seq5p\n"; 42 | print ">flank3p chrs=".join(',', @strs3p)."\n$seq3p\n"; 43 | print ">path cols=".join(',', @$cols)."\n$pathseq\n"; 44 | print "\n"; 45 | } 46 | 47 | close($fh); 48 | -------------------------------------------------------------------------------- /src/graph/graph_step.c: -------------------------------------------------------------------------------- 1 | #include "global.h" 2 | #include "graph_step.h" 3 | #include "util.h" 4 | 5 | /* 6 | This file contains the struct and constants used to record the behaviour of 7 | the GraphWalker at each "step". 8 | */ 9 | 10 | const char *graph_step_str[GRPHWLK_NUM_STATES] = {GRPHWLK_POPFWD_STR, 11 | GRPHWLK_COLFWD_STR, 12 | GRPHWLK_POPFRK_COLFWD_STR, 13 | GRPHWLK_NOCOVG_STR, 14 | GRPHWLK_NOCOLCOVG_STR, 15 | GRPHWLK_NOLINKS_STR, 16 | GRPHWLK_SPLIT_LINKS_STR, 17 | GRPHWLK_MISSING_LINKS_STR, 18 | GRPHWLK_USELINKS_STR}; 19 | 20 | char* graph_step_status2str(enum GraphStepStatus status, char *str, size_t len) 21 | { 22 | ctx_assert(len >= 20); (void)len; 23 | ctx_assert(status < GRPHWLK_NUM_STATES); 24 | strcpy(str, graph_step_str[status]); 25 | return str; 26 | } 27 | 28 | void graph_step_print_state_hist(const size_t hist[GRPHWLK_NUM_STATES]) 29 | { 30 | util_print_nums(graph_step_str, hist, GRPHWLK_NUM_STATES, 30); 31 | } 32 | -------------------------------------------------------------------------------- /tests/bubbles/bubbles2/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash -euo pipefail 2 | 3 | # 4 | # Test we call all bubbles when filtering ref bubbles 5 | # 6 | 7 | 8 | CTXDIR=../../.. 9 | MCCORTEX31=$(CTXDIR)/bin/mccortex31 10 | VCFNALLELES=$(CTXDIR)/libs/biogrok/vcf-count-alleles 11 | CTXFLANKS=$(CTXDIR)/scripts/cortex_print_flanks.sh 12 | VCFSORT=$(CTXDIR)/libs/biogrok/vcf-sort 13 | BWA=$(CTXDIR)/libs/bwa/bwa 14 | 15 | K=31 16 | 17 | SAMPLES=itchy scratchy ref 18 | FASTAS=$(SAMPLES:=.fa) 19 | GRAPHS=$(SAMPLES:=.k$(K).ctx) 20 | 21 | all: bubbles.txt test 22 | 23 | itchy.fa: scratchy.fa 24 | scratchy.fa: ref.fa 25 | ref.fa: 26 | python make-exhaustive-alleles.py itchy.fa scratchy.fa ref.fa 27 | 28 | %.k$(K).ctx: %.fa 29 | $(MCCORTEX31) build -k $(K) --sample "$*" --seq $< $@ >& $@.log 30 | 31 | bubbles.txt.log: bubbles.txt 32 | bubbles.txt: $(GRAPHS) 33 | $(MCCORTEX31) bubbles -o $@.gz --haploid 2 $(GRAPHS) >& $@.log 34 | gzip -fd $@.gz 35 | 36 | # expect 60 bubbles called = (2**4-1)*2*2 37 | # expect 64 haploid bubbles = (2**4)*2*2 (i.e. bubbles in ref ignored) 38 | test: bubbles.txt.log 39 | grep -q 'Bubble Caller called 60 bubbles' $< 40 | grep -q 'Haploid bubbles dropped: 64' $< 41 | grep -q 'Serial bubbles dropped: 0' $< 42 | @echo "=> all looks good." 43 | 44 | clean: 45 | rm -rf $(GRAPHS) bubbles.txt *.log 46 | 47 | .PHONY: all clean test 48 | -------------------------------------------------------------------------------- /src/global/ctx_assert.c: -------------------------------------------------------------------------------- 1 | #include "global.h" 2 | #include "ctx_assert.h" 3 | 4 | // 5 | // Checks and asserts 6 | // 7 | 8 | static void ctx_assertf2(const char *file, const char *func, int line, 9 | const char *asserttxt, const char *fmt, va_list argptr) 10 | { 11 | pthread_mutex_lock(&ctx_biglock); 12 | fflush(stdout); 13 | fprintf(stderr, "[%s:%i] Assert Failed %s(): %s", file, line, func, asserttxt); 14 | 15 | if(fmt != NULL) { 16 | fputs(": ", stderr); 17 | vfprintf(stderr, fmt, argptr); 18 | } 19 | 20 | // Print a timestamp so we know when the crash occurred 21 | fprintf(stderr, "\n"); 22 | timestampf(stderr); 23 | fputs(" Assert Error\n", stderr); 24 | fflush(stderr); 25 | pthread_mutex_unlock(&ctx_biglock); 26 | } 27 | 28 | void ctx_assertf_no_abort(const char *file, const char *func, int line, 29 | const char *asserttxt, const char *fmt, ...) 30 | { 31 | va_list argptr; 32 | va_start(argptr, fmt); 33 | ctx_assertf2(file, func, line, asserttxt, fmt, argptr); 34 | va_end(argptr); 35 | } 36 | 37 | void ctx_assertf(const char *file, const char *func, int line, 38 | const char *asserttxt, const char *fmt, ...) 39 | { 40 | va_list argptr; 41 | va_start(argptr, fmt); 42 | ctx_assertf2(file, func, line, asserttxt, fmt, argptr); 43 | va_end(argptr); 44 | abort(); 45 | } 46 | -------------------------------------------------------------------------------- /scripts/analysis/mapping-vars-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eou pipefail 4 | 5 | if [ $# -ne 4 ]; then 6 | echo "Usage: $0 " 1>&2 7 | echo " writes: OUT.fa, OUT.sam, OUT.stats.txt, OUT.sites.txt, OUT.sites.vcf.gz" 1>&2 8 | echo " sites that map + pass are in: OUT.vcf.gz" 1>&2 9 | exit -1 10 | fi 11 | set -o xtrace 12 | 13 | CTXDIR=$( cd $( dirname ${BASH_SOURCE[0]} ) && cd ../.. && pwd ) 14 | BWA=$CTXDIR/libs/bwa/bwa 15 | BGZIP=$CTXDIR/libs/htslib/bgzip 16 | BCFTOOLS=$CTXDIR/libs/bcftools/bcftools 17 | VCFCONTIGS=$CTXDIR/libs/vcf-slim/bin/vcfcontigs 18 | SAM2VCF=$CTXDIR/libs/vcf-slim/scripts/sam-name-to-vcf.sh 19 | VCFRENAME=$CTXDIR/libs/biogrok/vcf-rename 20 | VCF_SELECT_ID=$CTXDIR/libs/biogrok/vcf-select-id 21 | SAMCMP=$CTXDIR/scripts/analysis/haploid-sam-compare.py 22 | 23 | 24 | INVCF=$1 25 | REF=$2 26 | TRUTHFA=$3 27 | PREFIX=$4 28 | 29 | OUTFASTA=$PREFIX.fa 30 | OUTSAM=$PREFIX.sam 31 | OUTSTATS=$PREFIX.stats.txt 32 | OUTSITES=$PREFIX.sites.txt 33 | RENAMEDVCF=$PREFIX.renamed.vcf.gz 34 | OUTVCF=$PREFIX.vcf.gz 35 | 36 | mkdir -p $(dirname $OUTFASTA) 37 | 38 | $VCFRENAME $INVCF > $RENAMEDVCF 39 | $VCFCONTIGS --trim --no-ref 50 $REF $RENAMEDVCF > $OUTFASTA 40 | $BWA mem $TRUTHFA $OUTFASTA > $OUTSAM 41 | $SAMCMP --print-valid $OUTSITES $OUTSAM > $OUTSTATS 42 | $VCF_SELECT_ID <(cut -d: -f3 $OUTSITES) $RENAMEDVCF | $BGZIP -c > $OUTVCF 43 | -------------------------------------------------------------------------------- /src/basic/range.h: -------------------------------------------------------------------------------- 1 | #ifndef RANGE_H_ 2 | #define RANGE_H_ 3 | 4 | /* 5 | * Valid ranges are: 6 | * * 7 | * 1 8 | * 3 9 | * 2-4 10 | * 1,1-3,2 11 | */ 12 | 13 | /** 14 | * Parse range string and return number of items 15 | * 16 | * @return number of items in range, or -1 if there is a syntax error 17 | */ 18 | int range_get_num(const char *str, size_t range_max); 19 | 20 | /** 21 | * Parse range string into array arr 22 | * 23 | * @param str nul terminated string to parse 24 | * @param arr place parsed array here 25 | * @param range_max max value permitted in the array 26 | * @return 0 on success, -1 on error 27 | */ 28 | int range_parse_array(const char *str, size_t *arr, size_t range_max); 29 | 30 | /** 31 | * Parse range into array arr, filling array to ensure exactly a given number 32 | * of entries. If empty, array is filled 0..num_entries-1, if only one entry, 33 | * array is filled with same entry num_entries times 34 | * 35 | * @param str nul terminated string to parse 36 | * @param arr place parsed array here 37 | * @param range_max max value permitted in the array 38 | * @param num_entries Force exactly `num_entries` to be placed in `arr` 39 | * @return 0 on success, -1 on error 40 | */ 41 | int range_parse_array_fill(const char *str, size_t *arr, 42 | size_t range_max, size_t num_entries); 43 | 44 | #endif /* RANGE_H_ */ 45 | -------------------------------------------------------------------------------- /results/klebsiella/kleb_pneumoniae/large_events/large-events-plot.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript --vanilla 2 | 3 | # 4 | # Dot plot of ref allele length vs sample allele length 5 | # 6 | 7 | args <- commandArgs(trailingOnly=TRUE) 8 | if(length(args) != 1) { 9 | stop("Usage: Rscript --vanilla large-events-plot.R \n") 10 | } 11 | 12 | file <- args[1] 13 | #file <- 'bubbles50K/stats.txt' 14 | 15 | title <- expression(paste(italic('klebsiella pneumoniae'),' large event sizes')) 16 | xlabel <- 'Reference length (kbp)' 17 | ylabel <- 'Sample length (kbp)' 18 | 19 | r <- read.table(file,sep='\t',head=F,comment.char='#') 20 | r <- r / 1000 21 | 22 | # Get maximum and round to nearest 5 23 | lim <- max(r[,1],r[,2]) 24 | lim <- floor((ceiling(lim)+4)/5)*5 25 | 26 | pdf(file='kleb_large_events_R_log.pdf', width=6, height=6) 27 | plot(r, xlab=xlabel, ylab=ylabel, log="xy", 28 | main=title, xlim=c(0,lim), ylim=c(0,lim)) 29 | dev.off() 30 | 31 | # With ggplot 32 | library('ggplot2') 33 | library('reshape') 34 | library('scales') 35 | library('plyr') 36 | 37 | df <- data.frame(ref=r[,1], sample=r[,2]) 38 | 39 | p <- ggplot(df, aes(x=ref, y=sample)) + 40 | geom_point(shape=1) + 41 | scale_x_log10() + scale_y_log10() + 42 | xlim(0,lim) + ylim(0,lim) + 43 | ggtitle(title) + xlab(xlabel) + ylab(ylabel) 44 | 45 | ggsave(p, file='kleb_large_events_ggplot_log.pdf', width=6, height=6) 46 | -------------------------------------------------------------------------------- /tests/path_check/Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash -euo pipefail 2 | 3 | CTXDIR=../.. 4 | MCCORTEX=$(CTXDIR)/bin/mccortex31 5 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat 6 | K=7 7 | KEEP=$(shell echo genome.fa genome.k$(K).{ctx,ctp} reads.1.fa.gz reads.2.fa.gz reads.{se,pe}.k$(K).ctp) 8 | PLOTS=genome.k$(K).dot genome.k$(K).pdf 9 | 10 | all: $(KEEP) 11 | 12 | plots: $(PLOTS) 13 | 14 | clean: 15 | rm -rf $(KEEP) $(PLOTS) 16 | 17 | # Sample random genome 18 | genome.fa: 19 | $(DNACAT) -F -n 200 > genome.fa 20 | 21 | genome.k$(K).ctx: genome.fa 22 | $(MCCORTEX) build -q -m 10M -k $(K) --sample MssrGenome --seq $< $@ 23 | 24 | genome.k$(K).dot: genome.k$(K).ctx 25 | $(MCCORTEX) unitigs -q --dot $< > $@ 26 | 27 | genome.k$(K).pdf: genome.k$(K).dot 28 | dot -Tpdf $< > $@ 29 | 30 | reads.1.fa.gz reads.2.fa.gz: 31 | ../../libs/readsim/readsim -r genome.fa -l 10 -i 20 -v 0.1 -d 2 reads 32 | 33 | genome.k$(K).ctp: genome.k$(K).ctx 34 | $(MCCORTEX) thread -q -m 10M -t 1 --seq genome.fa -o $@ $< 35 | 36 | reads.se.k$(K).ctp: genome.k$(K).ctx reads.1.fa.gz reads.2.fa.gz 37 | $(MCCORTEX) thread -q -m 10M -t 1 --seq reads.1.fa.gz --seq reads.2.fa.gz -o reads.se.k$(K).ctp genome.k$(K).ctx 38 | 39 | reads.pe.k$(K).ctp: genome.k$(K).ctx reads.1.fa.gz reads.2.fa.gz 40 | $(MCCORTEX) thread -q -m 10M -t 1 --seq2 reads.1.fa.gz:reads.2.fa.gz -o reads.pe.k$(K).ctp genome.k$(K).ctx 41 | 42 | .PHONY: all plots clean 43 | -------------------------------------------------------------------------------- /results/data/chr22/uniq_flanks/Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash -euo pipefail 2 | # 3 | # Isaac Turner 4 | # 2014-09-05 5 | # 6 | 7 | CTXDIR=../../../.. 8 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat 9 | BWA=bwa 10 | SAMTOOLS=samtools 11 | 12 | REF=../chr22_17M_18M.fa 13 | 14 | K0=GGTCGCACACAAATACTACGGGCATTGGATGCGACCATAAGTCTTGACAGGCTTTGTTCCCT 15 | K1=ACAACTTAACCTGGAACTAGAACTAATTTATGAGCGAGCCAGAACAGGTAGTCTGAGGGAGT 16 | 17 | K0s=$(shell echo $(K0) | awk '{print substr($$0,1,11)}') 18 | K1s=$(shell echo $(K1) | awk '{print substr($$0,length($$0)-10,11)}') 19 | K0r=$(shell echo $(K0s) | $(DNACAT) -P -r -) 20 | K1r=$(shell echo $(K1s) | $(DNACAT) -P -r -) 21 | 22 | all: chr22.1Mbp.uniq.fa chr22.1Mbp.uniq.fa.bwt chr22.1Mbp.uniq.fa.fai check 23 | 24 | chr22.1Mbp.uniq.fa: 25 | (echo '>chr22_17M_18M.11bp.and.up.uniq.flanks'; \ 26 | echo $(K0); $(DNACAT) -P $(REF); echo $(K1);) | \ 27 | $(DNACAT) -F -w 80 - > $@ 28 | 29 | chr22.1Mbp.uniq.fa.bwt: chr22.1Mbp.uniq.fa 30 | $(BWA) index $< 31 | 32 | chr22.1Mbp.uniq.fa.fai: chr22.1Mbp.uniq.fa 33 | $(SAMTOOLS) faidx $< 34 | 35 | clean: 36 | rm -rf chr22.1Mbp.uniq.fa* 37 | 38 | check: chr22.1Mbp.uniq.fa 39 | @echo; echo Command should only print two kmers: 40 | $(DNACAT) -P chr22.1Mbp.uniq.fa | grep -ioE '($(K0s)|$(K0r)|$(K1s)|$(K1r))' - 41 | @echo; echo Check file contains exactly 1Mbp+2*62: 42 | $(DNACAT) -s chr22.1Mbp.uniq.fa 43 | 44 | .PHONY: all clean check 45 | -------------------------------------------------------------------------------- /tests/unitigs/Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash -euo pipefail 2 | 3 | # 4 | # Test unitigs command by generating 200 random DNA bases, building cortex graph 5 | # then generating untigs with various output options 6 | # 7 | 8 | K=7 9 | CTXDIR=../.. 10 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K) 11 | CTX2DOT=$(CTXDIR)/scripts/perl/mccortex-graph-to-graphviz.pl 12 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat 13 | 14 | FILES=genome.fa genome.k$(K).ctx 15 | UNITIGS=genome.k$(K).unitigs.fa genome.k$(K).unitigs.dot genome.k$(K).unitigs.gfa 16 | PLOTS=genome.k$(K).unitigs.dot genome.k$(K).kmers.dot 17 | PDFS=$(PLOTS:.dot=.pdf) 18 | 19 | TGTS=$(FILES) $(UNITIGS) $(PLOTS) 20 | 21 | all: $(TGTS) 22 | 23 | clean: 24 | rm -rf $(TGTS) $(PDFS) 25 | 26 | # Sample random genome 27 | genome.fa: 28 | $(DNACAT) -F -n 200 > genome.fa 29 | 30 | genome.k$(K).ctx: genome.fa 31 | $(MCCORTEX) build -q -m 1M -k $(K) --sample MssrGenome --seq $< $@ 32 | 33 | genome.k$(K).unitigs.fa: genome.k$(K).ctx 34 | $(MCCORTEX) unitigs -q -m 1M -o $@ $< 35 | 36 | genome.k$(K).unitigs.dot: genome.k$(K).ctx 37 | $(MCCORTEX) unitigs -q -m 1M --dot --points $< > $@ 38 | 39 | genome.k$(K).unitigs.gfa: genome.k$(K).ctx 40 | $(MCCORTEX) unitigs -q -m 1M --gfa $< > $@ 41 | 42 | genome.k$(K).kmers.dot: genome.k$(K).ctx 43 | $(CTX2DOT) $< > $@ 44 | 45 | %.pdf: %.dot 46 | dot -Tpdf $< > $@ 47 | 48 | plots: $(PDFS) 49 | 50 | .PHONY: all clean plots 51 | -------------------------------------------------------------------------------- /src/basic/chrom_pos_list.h: -------------------------------------------------------------------------------- 1 | #ifndef CHROM_POS_LIST_H_ 2 | #define CHROM_POS_LIST_H_ 3 | 4 | // ChromPosOffset coords are read/printed 1-based, stored 0-based 5 | typedef struct 6 | { 7 | char *chrom; 8 | size_t start, end, offset; // 0-based; start < end; end not inclusive 9 | bool fw_strand; 10 | } ChromPosOffset; 11 | 12 | #include "madcrowlib/madcrow_buffer.h" 13 | madcrow_buffer(chrompos_buf, ChromPosBuffer, ChromPosOffset); 14 | 15 | // Sort by length, chrom, strand (fw,rv), start 16 | int chrom_pos_cmp_len(const void *aa, const void *bb); 17 | 18 | // Validate a chrom position object 19 | void chrom_pos_validate(const ChromPosOffset *pos); 20 | #define chrom_pos_len(pos) ((pos)->end - (pos)->start) 21 | 22 | /** 23 | * Get largest match 24 | * @param buf List of chromosome positions to search 25 | * @param pos Copy largest to here 26 | * @param use_first If more than largest, return first, otherwise return last 27 | * @return Number of largest 28 | */ 29 | size_t chrom_pos_list_get_largest(const ChromPosBuffer *buf, bool use_first, 30 | ChromPosOffset *pos); 31 | 32 | // Parse a string in the form: chr:start-end:strand:offset[,...] 33 | // Return 0 on success, -1 on error 34 | int chrom_pos_list_parse(char *str, ChromPosBuffer *buf); 35 | 36 | void chrom_pos_list_sort(ChromPosBuffer *buf); 37 | 38 | #endif /* CHROM_POS_LIST_H_ */ 39 | -------------------------------------------------------------------------------- /tests/pop_bubbles/pop_bubbles2/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash -euo pipefail 2 | 3 | # Test pop bubbles with a multiple samples, multiple input files 4 | 5 | K=21 6 | CTXDIR=../../.. 7 | MCCORTEX=$(shell echo $(CTXDIR)/bin/mccortex$$[(($(K)+31)/32)*32 - 1]) 8 | 9 | SEQS=sample1.fa sample2.fa clean2.fa 10 | GRAPHS=sample1.ctx sample2.ctx popped.ctx truth.ctx 11 | 12 | all: popped.ctx truth.ctx check 13 | 14 | sample1.fa: 15 | ( echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAGATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; \ 16 | echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAGATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; ) > $@ 17 | 18 | sample2.fa: 19 | ( echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAcATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; ) > $@ 20 | 21 | clean2.fa: 22 | ( echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGA; \ 23 | echo ATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; ) > $@ 24 | 25 | truth.ctx: sample1.ctx sample2.ctx clean2.fa 26 | $(MCCORTEX) build -q -k $(K) -s clean2 -1 clean2.fa - | \ 27 | $(MCCORTEX) join -q -o $@ 0:sample1.ctx 1:- 28 | 29 | %.ctx: %.fa 30 | $(MCCORTEX) build -q -k $(K) --sample $* --seq $< $@ 31 | 32 | popped.ctx: sample1.ctx sample2.ctx 33 | $(MCCORTEX) popbubbles --out -q $@ $^ 34 | 35 | check: popped.ctx truth.ctx 36 | diff -q <($(MCCORTEX) view -q -k popped.ctx | sort) <($(MCCORTEX) view -q -k truth.ctx | sort) && \ 37 | echo "Kmers match." 38 | 39 | 40 | clean: 41 | rm -rf $(SEQS) $(GRAPHS) 42 | 43 | .PHONY: all clean check 44 | -------------------------------------------------------------------------------- /tests/vcfcov/calls5/truth.cov.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.2 2 | ##FILTER= 3 | ##fileDate=20151014 4 | ##reference=ref/ref.fa 5 | ##contig= 6 | ##FORMAT= 7 | ##SAMPLE= 8 | ##FORMAT= 9 | ##FORMAT= 10 | ##mccortex_9e268b2= 11 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT wally 12 | ref 7 . G T . PASS . K11R:K11A 4:0 13 | ref 15 . GCAATACCCCCCGATGACGG G . PASS . K11R:K11A 2:2 14 | ref 20 . A T . PASS . K11R:K11A 2:0 15 | ref 22 . C G . PASS . K11R:K11A 2:0 16 | ref 40 . T A . PASS . K11R:K11A 2:2 17 | ref 49 . GG CGAT . PASS . K11R:K11A 4:0 18 | ref 71 . A GAT . PASS . K11R:K11A 4:0 19 | ref 92 . GCACCAGGG AC . PASS . K11R:K11A 4:0 20 | ref 121 . C G . PASS . K11R:K11A 4:0 21 | ref 142 . GGCACAGCA TTACTCTTC . PASS . K11R:K11A 4:0 22 | ref 171 . A CTAG . PASS . K11R:K11A 4:0 23 | ref 192 . CATCATAG A . PASS . K11R:K11A 4:0 24 | -------------------------------------------------------------------------------- /src/alignment/correct_aln_input.h: -------------------------------------------------------------------------------- 1 | #ifndef CORRECT_ALN_INPUT_H_ 2 | #define CORRECT_ALN_INPUT_H_ 3 | 4 | #include "seqout.h" 5 | #include "cortex_types.h" 6 | #include "correct_alignment.h" 7 | #include "async_read_io.h" 8 | 9 | #include "cJSON/cJSON.h" 10 | 11 | typedef struct 12 | { 13 | AsyncIOInput files; 14 | uint8_t fq_cutoff, hp_cutoff; 15 | ReadMateDir matedir; 16 | CorrectAlnParam crt_params; 17 | // Next two only set if outputting sequences per file, as in ctx_correct.c 18 | char *out_base; 19 | SeqOutput *output; 20 | } CorrectAlnInput; 21 | 22 | #define CORRECT_ALN_INPUT_INIT {.fq_cutoff = 0, .hp_cutoff = 0, \ 23 | .matedir = READPAIR_FR, \ 24 | .crt_params = CORRECT_PARAMS_DEFAULT, \ 25 | .out_base = NULL, .output = NULL} 26 | 27 | #include "madcrowlib/madcrow_buffer.h" 28 | madcrow_buffer(correct_aln_input_buf, CorrectAlnInputBuffer, CorrectAlnInput); 29 | 30 | cJSON* correct_aln_input_json_hdr(const CorrectAlnInput *input); 31 | 32 | void correct_aln_input_print(const CorrectAlnInput *c); 33 | 34 | // Copy CorrectAlnInput to an array of AsyncIOInputs 35 | void correct_aln_input_to_asycio(AsyncIOInput *asyncio_tasks, 36 | CorrectAlnInput *inputs, 37 | size_t num_inputs); 38 | 39 | #endif /* CORRECT_ALN_INPUT_H_ */ 40 | -------------------------------------------------------------------------------- /scripts/R/plot-link-dist-cov.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript --vanilla 2 | 3 | # Plot coverage matrix generated by e.g. 'mccortex31 links --covg-hist out.csv ...' 4 | # 5 | args <- commandArgs(trailingOnly=TRUE) 6 | if(length(args) < 2 || length(args) > 5) { 7 | stop("Usage: ./plot-link-dist-cov.R [dist [cutoff [maxcov]]]\n") 8 | } 9 | 10 | dist=1 11 | cutoff=0 12 | maxcov=0 13 | 14 | input_csv <- args[1] 15 | output_pdf <- args[2] 16 | 17 | if(length(args) >= 3) { dist <- as.numeric(args[3]) } 18 | if(length(args) >= 4) { cutoff <- as.numeric(args[4]) } 19 | if(length(args) >= 5) { maxcov <- as.numeric(args[5]) } 20 | 21 | library('ggplot2') 22 | library('reshape') 23 | library('scales') 24 | library('plyr') 25 | 26 | cat("input_csv='",input_csv,"'\n",sep='') 27 | cat("output_pdf='",output_pdf,"'\n",sep='') 28 | cat('dist=',dist,'\n',sep='') 29 | cat('maxcov=',maxcov,'\n',sep='') 30 | 31 | r <- read.table(input_csv,sep=',',head=T,row.names=1,comment.char='#',as.is=T) 32 | 33 | if(maxcov == 0) { maxcov=ncol(r) } 34 | maxcov<-min(ncol(r), maxcov) 35 | r <- r[,1:maxcov] 36 | 37 | v<-as.numeric(r[dist,]) 38 | d<-data.frame(x=1:maxcov, y=v) 39 | 40 | p <- ggplot(d, aes(x=x, y=y)) + geom_line() + 41 | xlab("Link coverage") + 42 | ylab(paste("Number of links of length",(dist+1),"(kmers)")) + 43 | geom_vline(xintercept=cutoff, color="red") 44 | 45 | 46 | ggsave(p, file=output_pdf, width=6, height=6) 47 | -------------------------------------------------------------------------------- /results/kmer_size_experiment/results/plot-link-counts.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript --vanilla 2 | 3 | # Isaac Turner 2017-02-16 4 | 5 | args <- commandArgs(trailingOnly=TRUE) 6 | if(length(args) != 2) { 7 | stop("Usage: ./plot-link-counts.R \n") 8 | } 9 | 10 | plot_path <- "latest/perfect.linkcounts.se.pdf" 11 | csv_path <- "latest/perfect.linkcounts.se.csv" 12 | 13 | plot_path = args[1] 14 | csv_path = args[2] 15 | 16 | a <- read.table(csv_path, sep='\t',head=T,comment.char='#',as.is=T) 17 | 18 | # Plotting parameters 19 | cols <- c('#1b9e77', '#d95f02', '#7570b3', 'red') # from color brewer 20 | pnts <- c(19,4,17,1) # point styles pch= 21 | jf <- 0.2 # jitter factor 22 | lt <- 2.5 # line thickness 23 | # 24 | 25 | # * joins with no spaces, ~ joins with a space 26 | xlabel = expression(italic('k')) 27 | ylabel = expression('no. of '*italic('k')*'mers with links (log)') 28 | 29 | # pdf(plot_path, width=6, height=6) 30 | quartz(type='pdf',file=plot_path,width=6,height=5) 31 | 32 | # Remove empty title space 33 | par(mar=c(4,5,2,2)+0.1) # set margins: bottom, left, top and right 34 | par(xpd=TRUE) 35 | 36 | par(mgp=c(4, 1, 0)) # axis label positions 37 | 38 | plot(a$K, a$n_link_kmers, type='b', axes=F, log='y', 39 | xlab='', ylab='', ylim=c(1,max(a$n_link_kmers))) 40 | 41 | mtext(side=1, text=xlabel, line=2) 42 | mtext(side=2, text=ylabel, line=4) 43 | axis(1, at=a$K) 44 | axis(2, las=2) 45 | 46 | dev.off() 47 | -------------------------------------------------------------------------------- /tests/breakpoint/breakpoint2/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Check that we don't call a ref bubble 3 | # 4 | 5 | SHELL:=/bin/bash -euo pipefail 6 | 7 | CTXDIR=../../.. 8 | CTXPIPELINE=$(CTXDIR)/scripts/make-pipeline.pl 9 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat 10 | READSIM=$(CTXDIR)/libs/readsim/readsim 11 | VCFCOUNT=$(CTXDIR)/libs/biogrok/vcf-count 12 | 13 | REFLEN=1000 14 | K=31 15 | SEQDEPTH=30 16 | READLEN=50 17 | OUTDIR=proj 18 | 19 | all: run 20 | 21 | ref.fa: 22 | echo '>ref' > $@ 23 | echo -n TCTCATATGGGCATTGTCGTCTGCCCGTCACCTTCGGTCGACGCTGTTCAACATTCGGTGTTGTAGTTTATTATACTAGCGCAATCCCCGAGTTTGGGCA >> $@ 24 | echo TCTCATATGGGCATTGTCGTCTGCCCGTCACCTTCGGTCGAgGCTGTTCAACATTCGGTGTTGTAGTTTATTATACTAGCGCAATCCCCGAGTTTGGGCA >> $@ 25 | 26 | reads/reads.fa.gz: ref.fa 27 | mkdir -p reads 28 | $(READSIM) -r ref.fa -l $(READLEN) -s -d $(SEQDEPTH) reads/reads 29 | 30 | task.k$(K).mk: 31 | echo "RefReads reads/reads.fa.gz" | $(CTXPIPELINE) -r ref.fa $(K) proj - > $@ 32 | 33 | run: task.k$(K).mk reads/reads.fa.gz ref.fa 34 | $(MAKE) -f $< CTXDIR=$(CTXDIR) breakpoints-vcf 35 | @# Check no VCF entries 36 | (( `$(VCFCOUNT) proj/vcfs/breakpoints.joint.links.k$(K).vcf.gz` == 0 )) || false 37 | @# Check no breakpoint call entries 38 | (( `grep -c '>brkpnt' proj/k$(K)/breakpoints_links/joint.brk.gz` == 0 )) || false 39 | @echo 'Success: no breakpoint calls or VCF entries!' 40 | 41 | clean: 42 | rm -rf ref.fa* reads proj task.k$(K).mk 43 | 44 | .PHONY: all run clean 45 | -------------------------------------------------------------------------------- /src/graph/contig_confidence.h: -------------------------------------------------------------------------------- 1 | #ifndef CONTIG_CONFIDENCE_H_ 2 | #define CONTIG_CONFIDENCE_H_ 3 | 4 | #include "madcrowlib/madcrow_buffer.h" 5 | madcrow_buffer(double_buf,DoubleBuffer,double); 6 | 7 | typedef struct { 8 | DoubleBuffer table; 9 | size_t ncols; 10 | } ContigConfidenceTable; 11 | 12 | // Call conf_table_dealloc to release memory after calling this function 13 | // void conf_table_load_csv(ContigConfidenceTable *conf_table, 14 | // FILE *fh, const char *path); 15 | 16 | // Call conf_table_dealloc to release memory after calling this function 17 | void conf_table_update_hist(ContigConfidenceTable *table, 18 | size_t col, size_t genome_size, 19 | size_t *contig_hist, size_t hist_len); 20 | 21 | // Call conf_table_dealloc to release memory after calling this function 22 | void conf_table_calc(ContigConfidenceTable *table, size_t col, 23 | size_t max_read_len, double avg_bp_covg); 24 | 25 | void conf_table_alloc(ContigConfidenceTable *table, size_t ncols); 26 | void conf_table_dealloc(ContigConfidenceTable *table); 27 | 28 | double conf_table_lookup(const ContigConfidenceTable *table, 29 | size_t col, size_t dist); 30 | 31 | void conf_table_print(const ContigConfidenceTable *table, FILE *fh); 32 | 33 | void conf_table_save(const ContigConfidenceTable *table, const char *path); 34 | 35 | #endif /* CONTIG_CONFIDENCE_H_ */ 36 | -------------------------------------------------------------------------------- /src/basic/graph_info.h: -------------------------------------------------------------------------------- 1 | #ifndef DB_INFO_H_ 2 | #define DB_INFO_H_ 3 | 4 | #include 5 | #include "string_buffer/string_buffer.h" 6 | #include "cortex_types.h" 7 | #include "seq_loading_stats.h" 8 | 9 | // Thesholds are zero if not used (e.g. cleaned_unitigs == false) 10 | // is_graph_intersection is for cleaning a low covg sample against 11 | // cleaned pool of population 12 | typedef struct 13 | { 14 | bool cleaned_tips, cleaned_unitigs, cleaned_kmers; 15 | Covg clean_unitigs_thresh, clean_kmers_thresh; 16 | bool is_graph_intersection; 17 | StrBuf intersection_name; 18 | } ErrorCleaning; 19 | 20 | typedef struct 21 | { 22 | uint32_t mean_read_length; // after trim = (total_seq / number of contigs) 23 | uint64_t total_sequence; 24 | StrBuf sample_name; 25 | long double seq_err; 26 | ErrorCleaning cleaning; 27 | } GraphInfo; 28 | 29 | void graph_info_init(GraphInfo *ginfo); 30 | void graph_info_alloc(GraphInfo *ginfo); 31 | void graph_info_dealloc(GraphInfo *ginfo); 32 | 33 | void graph_info_make_intersect(const GraphInfo *ginfo, StrBuf *intersect_name); 34 | void graph_info_append_intersect(ErrorCleaning *cleaning, 35 | const char *intersect_name); 36 | 37 | void graph_info_cpy(GraphInfo *dst, const GraphInfo *src); 38 | void graph_info_merge(GraphInfo *dst, const GraphInfo *src); 39 | 40 | void graph_info_update_stats(GraphInfo *ginfo, const SeqLoadingStats *stats); 41 | 42 | #endif /* GRAPH_INFO_H_ */ 43 | -------------------------------------------------------------------------------- /tests/threading/threading3/Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash -euo pipefail 2 | 3 | K=9 4 | CTXDIR=../../.. 5 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K) 6 | CTX2DOT=$(CTXDIR)/scripts/perl/mccortex-graph-to-graphviz.pl 7 | 8 | GRAPHS=genome.k9.ctx 9 | LINKS=reads.pe.one.ctp reads.pe.two.ctp 10 | LOGS=$(addsuffix .log,$(GRAPHS) $(LINKS)) 11 | TGTS=genome.fa read.1.fa read.2.fa $(GRAPHS) $(LINKS) 12 | # non-default target: genome.k9.pdf 13 | 14 | all: $(TGTS) 15 | 16 | clean: 17 | rm -rf $(TGTS) $(LOGS) gap_sizes.*.csv mp_sizes.*.csv genome.k$(K).pdf 18 | 19 | plots: genome.k$(K).pdf 20 | 21 | genome.fa: 22 | echo gCATCAGTGGCCttggcgactcgc > genome.fa 23 | echo TCATCAGTGGCCATGACGCTAACT >> genome.fa 24 | 25 | read.1.fa: 26 | echo TCATCAGTGG > read.1.fa 27 | 28 | read.2.fa: 29 | # echo ACGCTAACT > read.2.fa # Actually revcmp read2 30 | echo AGTTAGCGT > read.2.fa 31 | 32 | genome.k$(K).ctx: genome.fa 33 | $(MCCORTEX) build -m 1M -k $(K) --sample Genome --seq genome.fa genome.k$(K).ctx >& $@.log 34 | 35 | reads.pe.one.ctp: genome.k$(K).ctx read.1.fa read.2.fa 36 | $(MCCORTEX) thread -m 1M --print-contigs --one-way --seq2 read.1.fa:read.2.fa -o $@ genome.k$(K).ctx >& $@.log 37 | 38 | reads.pe.two.ctp: genome.k$(K).ctx read.1.fa read.2.fa 39 | $(MCCORTEX) thread -m 1M --print-contigs --two-way --seq2 read.1.fa:read.2.fa -o $@ genome.k$(K).ctx >& $@.log 40 | 41 | genome.k$(K).pdf: 42 | $(CTX2DOT) genome.k$(K).ctx | dot -Tpdf > genome.k$(K).pdf 43 | 44 | .PHONY: all clean plots 45 | -------------------------------------------------------------------------------- /tests/vcfcov/calls1/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash -euo pipefail 2 | 3 | # 4 | # Test vcfcov with three groups of overlapping SNPs at positions ref:1,50,199 5 | # and chr1:30. Length of chromosome is ref=200, chr1=100. 6 | # We also test that we don't crash if we encounter a contig that was not defined 7 | # in the header. 8 | # 9 | 10 | K=21 11 | CTXDIR=../../.. 12 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K) 13 | VCFENTRIES=$(CTXDIR)/libs/biogrok/vcf-entries 14 | 15 | REF=../ref/ref.fa 16 | 17 | all: check 18 | 19 | clean: 20 | rm -rf calls.cov.vcf lowmem.cov.vcf graph.k$(K).ctx *.log 21 | 22 | calls.cov.vcf: $(REF) calls.vcf graph.k$(K).ctx 23 | $(MCCORTEX) vcfcov -m 10M -o $@ -r $(REF) --high-mem calls.vcf graph.k$(K).ctx >& $@.log 24 | 25 | lowmem.cov.vcf: $(REF) calls.vcf graph.k$(K).ctx 26 | $(MCCORTEX) vcfcov -m 10M -o $@ -r $(REF) --low-mem calls.vcf graph.k$(K).ctx >& $@.log 27 | 28 | graph.k$(K).ctx: john.fa jane.fa 29 | $(MCCORTEX) build -m 10M -k $(K) \ 30 | --sample John --seq john.fa \ 31 | --sample Jane --seq jane.fa \ 32 | --sample Empty --seq <(echo '') \ 33 | $@ >& $@.log 34 | 35 | check: calls.cov.vcf lowmem.cov.vcf truth.cov.vcf 36 | diff -q <($(VCFENTRIES) calls.cov.vcf) <($(VCFENTRIES) truth.cov.vcf) 37 | diff -q <($(VCFENTRIES) lowmem.cov.vcf) <($(VCFENTRIES) truth.cov.vcf) 38 | @echo "=> VCF files match." 39 | 40 | view: calls.cov.vcf truth.cov.vcf 41 | gzip -fcd calls.cov.vcf 42 | gzip -fcd truth.cov.vcf 43 | 44 | .PHONY: all clean view check 45 | -------------------------------------------------------------------------------- /results/var_calling_10ecoli/results/20150615_joint_1by1_links_plain/20150617.wed.stats.txt: -------------------------------------------------------------------------------- 1 | Missed: 2415 / 45789 ( 5.27%) 2 | FP: 384 / 43758 ( 0.88%) 3 | Found: 43374 / 45789 (94.73%) 4 | remember to delete temp dir: isec_1by1_cortex 5 | Missed: 2232 / 45789 ( 4.87%) 6 | FP: 391 / 43948 ( 0.89%) 7 | Found: 43557 / 45789 (95.13%) 8 | remember to delete temp dir: isec_brk_1by1_plain 9 | Missed: 1977 / 45789 ( 4.32%) 10 | FP: 392 / 44204 ( 0.89%) 11 | Found: 43812 / 45789 (95.68%) 12 | remember to delete temp dir: isec_brk_1by1_links 13 | Missed: 2074 / 45789 ( 4.53%) 14 | FP: 233 / 43948 ( 0.53%) 15 | Found: 43715 / 45789 (95.47%) 16 | remember to delete temp dir: isec_brk_joint_plain 17 | Missed: 2009 / 45789 ( 4.39%) 18 | FP: 233 / 44013 ( 0.53%) 19 | Found: 43780 / 45789 (95.61%) 20 | remember to delete temp dir: isec_brk_joint_links 21 | Missed: 1716 / 45789 ( 3.75%) 22 | FP: 3327 / 47400 ( 7.02%) 23 | Found: 44073 / 45789 (96.25%) 24 | remember to delete temp dir: isec_bub_1by1_plain 25 | Missed: 1672 / 45789 ( 3.65%) 26 | FP: 4068 / 48185 ( 8.44%) 27 | Found: 44117 / 45789 (96.35%) 28 | remember to delete temp dir: isec_bub_1by1_links 29 | Missed: 3746 / 45789 ( 8.18%) 30 | FP: 3180 / 45223 ( 7.03%) 31 | Found: 42043 / 45789 (91.82%) 32 | remember to delete temp dir: isec_bub_joint_plain 33 | Missed: 3688 / 45789 ( 8.05%) 34 | FP: 3363 / 45464 ( 7.40%) 35 | Found: 42101 / 45789 (91.95%) 36 | remember to delete temp dir: isec_bub_joint_links 37 | -------------------------------------------------------------------------------- /scripts/seq2pdf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Exit immediately if a command exits with a non-zero status. 4 | set -euo pipefail 5 | set +o posix 6 | 7 | cmd=$0 8 | 9 | function usage { 10 | >&2 echo "usage $cmd [--simplify|--dot] [...]" 11 | >&2 echo " prints pdf to stdout, so please remember to redirect" 12 | >&2 echo " e.g. $cmd 5 <(echo ACAACACGT) <(echo CCACACAA) > out.pdf" 13 | exit -1 14 | } 15 | 16 | script_args= 17 | mkpdf=1 18 | 19 | while [[ $# -gt 2 ]] 20 | do 21 | if [[ ($1 == "--simplify") ]] 22 | then 23 | script_args=$1 24 | shift 25 | elif [[ $1 == "--dot" ]] 26 | then 27 | mkpdf=0 28 | shift 29 | else 30 | usage 31 | fi 32 | done 33 | 34 | if [[ $# -ne 2 || !( $1 =~ ^[0-9]+$ ) ]] 35 | then 36 | usage 37 | fi 38 | 39 | kmer=$1 40 | shift 41 | 42 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && cd .. && pwd )" 43 | MCCORTEX="$DIR/bin/mccortex" 44 | CTX2GRAPHVIZ="$DIR/scripts/perl/mccortex-graph-to-graphviz.pl" 45 | if [[ !(-e $MCCORTEX) || !(-x $MCCORTEX) ]] 46 | then 47 | echo "Did you compile McCortex? I cannot run `$MCCORTEX`" 48 | exit -1 49 | fi 50 | 51 | files=$(printf " --seq %s" $@; printf "\n") 52 | 53 | if [[ $mkpdf == 1 ]]; then 54 | $MCCORTEX $kmer build -q -k $kmer --sample seq2pdf $files - | \ 55 | $CTX2GRAPHVIZ -k $kmer $script_args - | \ 56 | dot -Tpdf 57 | else 58 | $MCCORTEX $kmer build -q -k $kmer --sample seq2pdf $files - | \ 59 | $CTX2GRAPHVIZ -k $kmer $script_args - 60 | fi 61 | -------------------------------------------------------------------------------- /src/graph/db_unitig.h: -------------------------------------------------------------------------------- 1 | #ifndef DB_UNITIG_H_ 2 | #define DB_UNITIG_H_ 3 | 4 | #include "cortex_types.h" 5 | #include "db_graph.h" 6 | #include "db_node.h" 7 | 8 | // Orient unitig 9 | // Once oriented, unitig has lowest poosible kmerkey at the beginning, 10 | // oriented FORWARDs if possible 11 | void db_unitig_normalise(dBNode *nlist, size_t len, const dBGraph *db_graph); 12 | 13 | // Extend a unitig, nlist[offset] and olist[offset] must already be set 14 | // Walk along nodes starting from node/or, storing the unitig in nlist/olist 15 | // Returns the number of nodes added, adds no more than `limit` 16 | // return false if out of space and limit > 0 17 | bool db_unitig_extend(dBNodeBuffer *nbuf, size_t limit, 18 | const dBGraph *db_graph); 19 | 20 | // Fills with unitig that contains hkey 21 | // Does not reset nbuf 22 | void db_unitig_fetch(hkey_t node, dBNodeBuffer *nbuf, const dBGraph *db_graph); 23 | 24 | // Count number of read starts using coverage data 25 | size_t db_unitig_read_starts(const Covg *covgs, size_t len); 26 | size_t db_unitig_covg_mean(const Covg *covgs, size_t len); 27 | 28 | /** 29 | * @param visited must be initialised to zero, will be dirty upon return 30 | **/ 31 | void db_unitigs_iterate(size_t nthreads, uint8_t *visited, 32 | const dBGraph *db_graph, 33 | void (*func)(dBNodeBuffer nbuf, size_t threadid, void *arg), 34 | void *arg); 35 | 36 | #endif /* DB_UNITIG_H_ */ 37 | -------------------------------------------------------------------------------- /results/klebsiella/kleb_pneumoniae/platypus/call-platypus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eou pipefail 4 | set -o xtrace 5 | 6 | REF=../ref/GCF_000016305.1_ASM1630v1_genomic.fa 7 | BAM=../remap/mapped/KlebPneu.bam 8 | BAMRMDUP=../remap/mapped/KlebPneu.rmdup.bam 9 | 10 | CTXDIR=~/mccortex 11 | VCF_ADD_CONTIGS=$CTXDIR/libs/biogrok/vcf-add-contigs 12 | VCF_PASS=$CTXDIR/libs/biogrok/vcf-pass 13 | 14 | PLATDIR=~/bioinf/Platypus 15 | 16 | source $PLATDIR/prepare.sh 17 | python $PLATDIR/bin/Platypus.py callVariants --logFileName platypus.rmdup.log \ 18 | --output=platypus.rmdup.vcf \ 19 | --refFile=$REF --bamFiles=$BAMRMDUP >& platypus.rmdup.vcf.log 20 | 21 | python $PLATDIR/bin/Platypus.py callVariants --logFileName platypus.assem.log \ 22 | --output=platypus.assem.vcf --assemble=1 \ 23 | --refFile=$REF --bamFiles=$BAMRMDUP >& platypus.assem.vcf.log 24 | 25 | # Add contigs to header 26 | $VCF_ADD_CONTIGS <(dnacat --lengths $REF) KlebPneu_MGH_78578 platypus.rmdup.vcf | \ 27 | $BCFTOOLS norm --check-ref x -m -any --fasta-ref $REF --site-win 5000 | \ 28 | $BCFTOOLS norm --rm-dup any --do-not-normalize | \ 29 | $VCF_PASS > platypus.vcf 30 | $BGZIP platypus.vcf 31 | $BCFTOOLS index platypus.vcf.gz 32 | 33 | # Analysis 34 | rm -rf mummer_isec mapping_truth cortex.k31.k61.{mapping,isec}.log 35 | ./analysis.sh >& analysis.log 36 | -------------------------------------------------------------------------------- /tests/sort/Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash -euo pipefail 2 | 3 | # Any kmer (K=) should work 4 | 5 | K=51 6 | CTXDIR=../.. 7 | MCCORTEX=$(shell echo $(CTXDIR)/bin/mccortex$$[(($(K)+31)/32)*32 - 1]) 8 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat 9 | 10 | GRAPHS=seq.fa graph.k$(K).ctx build.then.sort.k$(K).ctx build.and.sort.k$(K).ctx 11 | MISC=kmers.sorted.k$(K).txt build.then.sort.k$(K).ctx.idx 12 | LOGS=$(addsuffix .log,$(GRAPHS) $(MISC)) 13 | 14 | all: title $(GRAPHS) $(MISC) check 15 | 16 | title: 17 | @echo "-- Testing sort k=$(K) --" 18 | 19 | clean: 20 | rm -rf $(GRAPHS) $(MISC) $(LOGS) 21 | 22 | seq.fa: 23 | $(DNACAT) -F -n 100 > $@ 24 | 25 | graph.k$(K).ctx: seq.fa 26 | $(MCCORTEX) build -k $(K) --sample Jimmy --seq $< $@ >& $@.log 27 | $(MCCORTEX) check -q $@ 28 | 29 | build.then.sort.k$(K).ctx: graph.k$(K).ctx 30 | $(MCCORTEX) sort -o $@ $< >& $@.log 31 | $(MCCORTEX) check -q $@ 32 | 33 | build.and.sort.k$(K).ctx: seq.fa 34 | $(MCCORTEX) build -k $(K) --sort --sample Jimmy --seq $< $@ >& $@.log 35 | $(MCCORTEX) check -q $@ 36 | 37 | %.ctx.idx: %.ctx 38 | $(MCCORTEX) index --out $@ --block-kmers 11 $< >& $@.log 39 | 40 | kmers.sorted.k$(K).txt: graph.k$(K).ctx 41 | $(MCCORTEX) view -q --kmers $< | sort > $@ 42 | 43 | check: kmers.sorted.k$(K).txt build.then.sort.k$(K).ctx build.and.sort.k$(K).ctx 44 | diff -q $< <($(MCCORTEX) view -q -k build.then.sort.k$(K).ctx) 45 | diff -q $< <($(MCCORTEX) view -q -k build.and.sort.k$(K).ctx) 46 | 47 | .PHONY: all clean check title 48 | -------------------------------------------------------------------------------- /tests/vcfcov/calls2/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash -euo pipefail 2 | 3 | # 4 | # Test vcfcov with too many overlapping variants 5 | # 6 | # Test VCF of a SNP per base generated with: 7 | # ./fake-vcf.py ../ref/ref.fa > calls.vcf 8 | # 9 | 10 | K=21 11 | CTXDIR=../../.. 12 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K) 13 | VCFENTRIES=$(CTXDIR)/libs/biogrok/vcf-entries 14 | 15 | REF=../ref/ref.fa 16 | 17 | all: test 18 | 19 | clean: 20 | rm -rf calls.cov.vcf* lowmem.cov.vcf* graph.k$(K).ctx *.log 21 | 22 | calls.cov.vcf.log: calls.cov.vcf 23 | calls.cov.vcf: $(REF) calls.vcf graph.k$(K).ctx 24 | $(MCCORTEX) vcfcov -m 10M -o $@ -r $(REF) --max-nvars 4 --high-mem calls.vcf graph.k$(K).ctx >& $@.log 25 | 26 | lowmem.cov.vcf: $(REF) calls.vcf graph.k$(K).ctx 27 | $(MCCORTEX) vcfcov -m 10M -o $@ -r $(REF) --max-nvars 4 --low-mem calls.vcf graph.k$(K).ctx >& $@.log 28 | 29 | graph.k$(K).ctx: sample.fa 30 | $(MCCORTEX) build -m 10M -k $(K) --sample John --seq sample.fa $@ >& $@.log 31 | 32 | test: calls.cov.vcf lowmem.cov.vcf truth.cov.vcf calls.cov.vcf.log 33 | diff -q <($(VCFENTRIES) calls.cov.vcf) <($(VCFENTRIES) truth.cov.vcf) 34 | diff -q <($(VCFENTRIES) lowmem.cov.vcf) <($(VCFENTRIES) truth.cov.vcf) 35 | @echo "=> VCF files match." 36 | [[ `grep -o 'max alleles in buffer:.*' calls.cov.vcf.log | grep -o '[0-9][0-9]*'` -lt 50 ]] 37 | @echo "=> Buffer kept below 50 VCF entries." 38 | 39 | view: calls.cov.vcf truth.cov.vcf 40 | gzip -fcd calls.cov.vcf 41 | gzip -fcd truth.cov.vcf 42 | 43 | .PHONY: all clean view test 44 | -------------------------------------------------------------------------------- /src/basic/seq_loading_stats.h: -------------------------------------------------------------------------------- 1 | #ifndef SEQ_LOADING_STATS_H_ 2 | #define SEQ_LOADING_STATS_H_ 3 | 4 | // Stucture for statistics on loading sequence and cortex binary files 5 | typedef struct 6 | { 7 | // num_se_reads includes good reads, bad reads and duplicates etc. 8 | size_t num_se_reads, num_pe_reads; 9 | size_t num_good_reads, num_bad_reads, num_dup_se_reads, num_dup_pe_pairs; 10 | size_t total_bases_read, total_bases_loaded; 11 | size_t contigs_parsed, num_kmers_parsed, num_kmers_loaded, num_kmers_novel; 12 | uint64_t *col_nkmers, *col_sum_covgs; 13 | size_t ncols; // max number of colours loaded 14 | } SeqLoadingStats; 15 | 16 | #define SEQ_LOADING_STATS_INIT (SeqLoadingStats){ \ 17 | .num_se_reads = 0, .num_pe_reads = 0, \ 18 | .num_good_reads = 0, .num_bad_reads = 0, \ 19 | .num_dup_se_reads = 0, .num_dup_pe_pairs = 0, \ 20 | .total_bases_read = 0, .total_bases_loaded = 0, \ 21 | .contigs_parsed = 0, .num_kmers_parsed = 0, \ 22 | .num_kmers_loaded = 0, .num_kmers_novel = 0, \ 23 | .col_nkmers = NULL, .col_sum_covgs = NULL, \ 24 | .ncols = 0 \ 25 | } 26 | 27 | // Functions for dealing with file loading statistics 28 | #define seq_loading_stats_init(s) memset(s, 0, sizeof(SeqLoadingStats)) 29 | void seq_loading_stats_merge(SeqLoadingStats *dst, const SeqLoadingStats *src); 30 | 31 | // @ht_num_kmers is the number of kmers loaded into the graph 32 | void seq_loading_stats_print(const SeqLoadingStats *stats, size_t ht_num_kmers); 33 | 34 | #endif /* SEQ_LOADING_STATS_H_ */ 35 | --------------------------------------------------------------------------------