├── tests
    ├── vcfcov
    │   ├── calls2
    │   │   ├── sample.fa
    │   │   └── Makefile
    │   ├── calls0
    │   │   ├── calls.vcf
    │   │   ├── truth.cov.vcf
    │   │   └── Makefile
    │   ├── ref
    │   │   └── ref.fa
    │   ├── calls3
    │   │   ├── hardy.fa
    │   │   ├── laurel.fa
    │   │   └── calls.vcf
    │   ├── calls4
    │   │   ├── calls.vcf
    │   │   ├── pluto.fa
    │   │   └── truth.cov.vcf
    │   ├── calls5
    │   │   ├── calls.vcf
    │   │   └── truth.cov.vcf
    │   ├── calls1
    │   │   ├── jane.fa
    │   │   ├── calls.vcf
    │   │   ├── john.fa
    │   │   └── Makefile
    │   └── Makefile
    ├── bubbles
    │   ├── bubbles5
    │   │   └── truth.vcf
    │   ├── Makefile
    │   └── bubbles2
    │   │   └── Makefile
    ├── pjoin
    │   ├── Makefile
    │   ├── pjoin1
    │   │   └── Makefile
    │   └── pjoin0
    │   │   └── Makefile
    ├── pop_bubbles
    │   ├── Makefile
    │   ├── pop_bubbles1
    │   │   └── Makefile
    │   └── pop_bubbles2
    │   │   └── Makefile
    ├── build
    │   └── Makefile
    ├── clean_graph
    │   ├── Makefile
    │   └── clean2
    │   │   └── Makefile
    ├── lossless
    │   └── Makefile
    ├── breakpoint
    │   ├── Makefile
    │   ├── breakpoint0
    │   │   └── Makefile
    │   └── breakpoint2
    │   │   └── Makefile
    ├── threading
    │   ├── Makefile
    │   ├── threading1
    │   │   └── Makefile
    │   └── threading3
    │   │   └── Makefile
    ├── subgraph_unitigs
    │   └── Makefile
    ├── coverage
    │   └── Makefile
    ├── largeK
    │   └── Makefile
    ├── graphviz
    │   └── Makefile
    ├── run.sh
    ├── path_check
    │   └── Makefile
    ├── unitigs
    │   └── Makefile
    └── sort
    │   └── Makefile
├── scripts
    ├── bash
    │   ├── links-count.sh
    │   ├── links-bytes.sh
    │   ├── links-median-threshold.sh
    │   ├── mccortex-to-ray.sh
    │   └── vcf-longest-haplotype.sh
    ├── build
    │   ├── multik-build.sh
    │   ├── update.sh
    │   └── mccortex
    ├── cortex_print_flanks.sh
    ├── R
    │   ├── install-deps.R
    │   ├── plot-length-hist.R
    │   └── plot-link-dist-cov.R
    ├── analysis
    │   ├── mummer2vcf.sh
    │   ├── sam-count-vars.pl
    │   └── mapping-vars-test.sh
    ├── perl
    │   ├── LineReader.pm
    │   ├── mccortex-header.pl
    │   ├── bubbles-example.pl
    │   ├── bubbles-to-contigs.pl
    │   └── breakpoints-example.pl
    ├── calculations
    │   ├── c-within-rand-a-b-bounds.pl
    │   └── bloom-filter-fpr.pl
    ├── report
    │   ├── make-kmer-plot.sh
    │   └── make-link-plot.sh
    ├── make-isec.sh
    └── seq2pdf.sh
├── results
    ├── data
    │   ├── download.sh
    │   ├── ecoli
    │   │   ├── README.txt
    │   │   └── Makefile
    │   ├── PhiX
    │   │   └── about.txt
    │   └── chr22
    │   │   ├── uniq_flanks
    │   │       ├── README.txt
    │   │       └── Makefile
    │   │   ├── about.txt
    │   │   └── Makefile
    ├── kmer_size_experiment
    │   ├── results
    │   │   ├── 20170206mon_chr22_28M
    │   │   │   ├── err-corr-plots
    │   │   │   │   └── Makefile
    │   │   │   ├── stoch.pdf
    │   │   │   ├── perfect.pdf
    │   │   │   ├── stocherr.pdf
    │   │   │   ├── plain-vs-pe.pdf
    │   │   │   ├── linkcounts.pe.pdf
    │   │   │   ├── linkcounts.se.pdf
    │   │   │   ├── pe-vs-sga-errs.pdf
    │   │   │   ├── pe-vs-sga-ng50.pdf
    │   │   │   ├── perfect_no_pe.pdf
    │   │   │   ├── plain-vs-links.pdf
    │   │   │   ├── stocherrcorr.pdf
    │   │   │   ├── plain-vs-pe-corr.pdf
    │   │   │   ├── corr-pe-vs-sga-errs.pdf
    │   │   │   ├── corr-pe-vs-sga-ng50.pdf
    │   │   │   ├── links-vs-sga-errs.pdf
    │   │   │   ├── links-vs-sga-ng50.pdf
    │   │   │   ├── plain-vs-links-corr.pdf
    │   │   │   ├── stoch.linkcounts.pe.pdf
    │   │   │   ├── stoch.linkcounts.se.pdf
    │   │   │   ├── perfect.linkcounts.pe.pdf
    │   │   │   ├── perfect.linkcounts.se.pdf
    │   │   │   ├── corr-links-vs-sga-errs.pdf
    │   │   │   ├── corr-links-vs-sga-ng50.pdf
    │   │   │   ├── corr-pe-vs-raw-sga-errs.pdf
    │   │   │   ├── corr-pe-vs-raw-sga-ng50.pdf
    │   │   │   ├── stoch.plain.csv
    │   │   │   ├── stocherr.linkcounts.pe.pdf
    │   │   │   ├── stocherr.linkcounts.se.pdf
    │   │   │   ├── corr-links-vs-raw-sga-errs.pdf
    │   │   │   ├── corr-links-vs-raw-sga-ng50.pdf
    │   │   │   ├── perfect.links.csv
    │   │   │   ├── perfect.pe.csv
    │   │   │   ├── perfect.plain.csv
    │   │   │   ├── stoch.links.csv
    │   │   │   ├── stocherr.pe.csv
    │   │   │   ├── stocherr.plain.csv
    │   │   │   ├── stocherrcorr.linkcounts.pe.pdf
    │   │   │   ├── stocherrcorr.linkcounts.se.pdf
    │   │   │   ├── stoch.pe.csv
    │   │   │   ├── stocherr.links.csv
    │   │   │   ├── stocherr.sga.csv
    │   │   │   ├── stocherrcorr.pe.csv
    │   │   │   ├── stocherrcorr.plain.csv
    │   │   │   ├── stocherrcorr.links.csv
    │   │   │   ├── stocherrcorr.sga.csv
    │   │   │   ├── perfect.linkcounts.se.csv
    │   │   │   ├── stoch.linkcounts.se.csv
    │   │   │   ├── stoch.linkcounts.pe.csv
    │   │   │   ├── stocherr.linkcounts.pe.csv
    │   │   │   ├── stocherr.linkcounts.se.csv
    │   │   │   ├── stocherrcorr.linkcounts.se.csv
    │   │   │   ├── perfect.linkcounts.pe.csv
    │   │   │   ├── stocherrcorr.linkcounts.pe.csv
    │   │   │   ├── bad.edges.csv
    │   │   │   ├── cleaning.corr.table.csv
    │   │   │   └── cleaning.table.csv
    │   │   ├── 20161012wed
    │   │   │   ├── stoch.pdf
    │   │   │   ├── perfect.pdf
    │   │   │   ├── stocherr.pdf
    │   │   │   ├── perfect_nope.pdf
    │   │   │   ├── stocherrcorr.pdf
    │   │   │   ├── perfect.plain.csv
    │   │   │   ├── stoch.plain.csv
    │   │   │   ├── stocherr.pe.csv
    │   │   │   ├── stocherr.plain.csv
    │   │   │   ├── perfect.links.csv
    │   │   │   ├── stoch.links.csv
    │   │   │   ├── stoch.pe.csv
    │   │   │   ├── stocherr.links.csv
    │   │   │   ├── stocherrcorr.plain.csv
    │   │   │   ├── perfect.pe.csv
    │   │   │   ├── stocherrcorr.links.csv
    │   │   │   ├── stocherrcorr.pe.csv
    │   │   │   ├── cleaning.corr.table.csv
    │   │   │   ├── cleaning.table.csv
    │   │   │   └── bad.edges.csv
    │   │   ├── 20160912mon
    │   │   │   ├── stoch.cov.pdf
    │   │   │   ├── perfect.cov.pdf
    │   │   │   ├── stocherr.cov.pdf
    │   │   │   ├── perfect.plain.csv
    │   │   │   ├── stoch.plain.csv
    │   │   │   ├── perfect.links.csv
    │   │   │   ├── stoch.links.csv
    │   │   │   ├── stocherr.plain.csv
    │   │   │   ├── stocherr.links.csv
    │   │   │   ├── notes.txt
    │   │   │   └── seqn.errors.csv
    │   │   ├── 20170211sat_chr22_28M_nomissing
    │   │   │   ├── README.md
    │   │   │   ├── stoch.pdf
    │   │   │   ├── perfect.pdf
    │   │   │   ├── stocherr.pdf
    │   │   │   ├── plain-vs-pe.pdf
    │   │   │   ├── pe-vs-sga-errs.pdf
    │   │   │   ├── pe-vs-sga-ng50.pdf
    │   │   │   ├── perfect_no_pe.pdf
    │   │   │   ├── plain-vs-links.pdf
    │   │   │   ├── stocherrcorr.pdf
    │   │   │   ├── plain-vs-pe-corr.pdf
    │   │   │   ├── corr-pe-vs-sga-errs.pdf
    │   │   │   ├── corr-pe-vs-sga-ng50.pdf
    │   │   │   ├── links-vs-sga-errs.pdf
    │   │   │   ├── links-vs-sga-ng50.pdf
    │   │   │   ├── plain-vs-links-corr.pdf
    │   │   │   ├── stoch.plain.csv
    │   │   │   ├── perfect.links.csv
    │   │   │   ├── perfect.pe.csv
    │   │   │   ├── perfect.plain.csv
    │   │   │   ├── stoch.links.csv
    │   │   │   ├── stocherr.plain.csv
    │   │   │   ├── corr-links-vs-sga-errs.pdf
    │   │   │   ├── corr-links-vs-sga-ng50.pdf
    │   │   │   ├── stoch.pe.csv
    │   │   │   ├── stocherr.links.csv
    │   │   │   ├── stocherr.pe.csv
    │   │   │   ├── stocherr.sga.csv
    │   │   │   ├── stocherrcorr.plain.csv
    │   │   │   ├── stocherrcorr.sga.csv
    │   │   │   ├── stocherrcorr.links.csv
    │   │   │   ├── stocherrcorr.pe.csv
    │   │   │   ├── bad.edges.csv
    │   │   │   ├── cleaning.corr.table.csv
    │   │   │   └── cleaning.table.csv
    │   │   ├── 20160929thurs
    │   │   │   ├── stoch.cov.pdf
    │   │   │   ├── perfect.cov.pdf
    │   │   │   ├── stocherr.cov.pdf
    │   │   │   ├── perfect_no_pe.pdf
    │   │   │   ├── stoch.plain.csv
    │   │   │   ├── stocherr.plain.csv
    │   │   │   ├── perfect.plain.csv
    │   │   │   ├── stoch.links.csv
    │   │   │   ├── stoch.pe.csv
    │   │   │   ├── stocherr.links.csv
    │   │   │   ├── stocherr.pe.csv
    │   │   │   ├── perfect.links.csv
    │   │   │   ├── perfect.pe.csv
    │   │   │   ├── cleaning.table.csv
    │   │   │   └── bad.edges.csv
    │   │   ├── 20170211sat_chr22_28M_confidstep
    │   │   │   ├── README.md
    │   │   │   ├── stoch.pdf
    │   │   │   ├── perfect.pdf
    │   │   │   ├── stocherr.pdf
    │   │   │   ├── perfect_no_pe.pdf
    │   │   │   ├── plain-vs-pe.pdf
    │   │   │   ├── stocherrcorr.pdf
    │   │   │   ├── pe-vs-sga-errs.pdf
    │   │   │   ├── pe-vs-sga-ng50.pdf
    │   │   │   ├── plain-vs-links.pdf
    │   │   │   ├── links-vs-sga-errs.pdf
    │   │   │   ├── links-vs-sga-ng50.pdf
    │   │   │   ├── plain-vs-pe-corr.pdf
    │   │   │   ├── stoch.plain.csv
    │   │   │   ├── corr-pe-vs-sga-errs.pdf
    │   │   │   ├── corr-pe-vs-sga-ng50.pdf
    │   │   │   ├── perfect.links.csv
    │   │   │   ├── perfect.plain.csv
    │   │   │   ├── plain-vs-links-corr.pdf
    │   │   │   ├── stoch.links.csv
    │   │   │   ├── stocherr.plain.csv
    │   │   │   ├── corr-links-vs-sga-errs.pdf
    │   │   │   ├── corr-links-vs-sga-ng50.pdf
    │   │   │   ├── perfect.pe.csv
    │   │   │   ├── stoch.pe.csv
    │   │   │   ├── stocherr.pe.csv
    │   │   │   ├── stocherrcorr.plain.csv
    │   │   │   ├── stocherr.links.csv
    │   │   │   ├── stocherr.sga.csv
    │   │   │   ├── stocherrcorr.sga.csv
    │   │   │   ├── stocherrcorr.links.csv
    │   │   │   ├── stocherrcorr.pe.csv
    │   │   │   ├── bad.edges.csv
    │   │   │   ├── cleaning.corr.table.csv
    │   │   │   └── cleaning.table.csv
    │   │   ├── make-csv.sh
    │   │   ├── count-links.pl
    │   │   └── plot-link-counts.R
    │   ├── notes.txt
    │   └── sga-all-kmers.sh
    ├── file_buffering
    │   ├── results20150413mon.mac.csv
    │   ├── results20150413mon.mac.txt
    │   ├── README.txt
    │   └── file-buffering.sh
    ├── klebsiella
    │   └── kleb_pneumoniae
    │   │   ├── indels
    │   │       └── about.txt
    │   │   ├── assembly
    │   │       └── get-max-covg.sh
    │   │   ├── freebayes
    │   │       ├── analysis.sh
    │   │       └── freebayes.sh
    │   │   ├── platypus
    │   │       ├── analysis.sh
    │   │       └── call-platypus.sh
    │   │   ├── cortex
    │   │       └── analysis.sh
    │   │   ├── mcrun
    │   │       └── analysis.sh
    │   │   └── large_events
    │   │       └── large-events-plot.R
    ├── benchmark
    │   ├── 10diploid10X
    │   │   ├── run-sim.sh
    │   │   └── smaller.fa
    │   ├── minidiploid
    │   │   └── run-sim.sh
    │   └── diploid60X
    │   │   └── run-sim.sh
    ├── hash_table_benchmark
    │   ├── results20150409thurs.mac.txt
    │   ├── stats.R
    │   └── results20150409thurs.linux.txt
    ├── correct
    │   └── hg_chr22
    │   │   └── README.txt
    ├── README.txt
    ├── contig_confidence
    │   ├── Makefile
    │   └── confidence.tex
    ├── traversal_conjecture
    │   └── README.txt
    ├── var_calling_10ecoli
    │   ├── results
    │   │   ├── 20150510_sun_initrun
    │   │   │   └── 20150510.sun.txt.4
    │   │   └── 20150615_joint_1by1_links_plain
    │   │   │   └── 20150617.wed.stats.txt
    │   └── about.txt
    └── var_calling_diploid_chr22_1Mbp
    │   └── about.txt
├── libs
    ├── misc
    │   ├── README.md
    │   ├── mem_size.h
    │   ├── Makefile
    │   └── jenkins.h
    ├── maximal_substrs
    │   └── Makefile
    └── cJSON
    │   ├── tests
    │       ├── test2
    │       ├── test1
    │       ├── test3
    │       └── test5
    │   └── LICENSE
├── .gitignore
├── AUTHORS
├── src
    ├── kmer
    │   ├── kmer_size.h
    │   └── kmer_size.c
    ├── basic
    │   ├── str_parsing.h
    │   ├── common_buffers.h
    │   ├── str_parsing.c
    │   ├── hash_mem.h
    │   ├── decomp_breakpoint.h
    │   ├── hash.h
    │   ├── decomp_bubble.h
    │   ├── range.h
    │   ├── chrom_pos_list.h
    │   ├── graph_info.h
    │   └── seq_loading_stats.h
    ├── tools
    │   ├── infer_edges.h
    │   ├── correct_reads.h
    │   ├── pop_bubbles.h
    │   ├── vcf_coverage.h
    │   └── generate_paths.h
    ├── graph
    │   ├── graph_format.h
    │   ├── graph_search.h
    │   ├── prune_nodes.h
    │   ├── graph_format.c
    │   ├── graph_step.c
    │   ├── contig_confidence.h
    │   └── db_unitig.h
    ├── global
    │   ├── cortex_types.h
    │   ├── global.c
    │   └── ctx_assert.c
    ├── paths
    │   └── gpath_follow.h
    └── alignment
    │   └── correct_aln_input.h
├── dev
    └── bkmer_revcmp
    │   └── Makefile
├── travis
    ├── script.sh
    ├── install.sh
    └── provision-vm.sh
├── LICENSE
└── .travis.yml


/tests/vcfcov/calls2/sample.fa:
--------------------------------------------------------------------------------
1 | >ref
2 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGG
3 | 


--------------------------------------------------------------------------------
/scripts/bash/links-count.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -eou pipefail
4 | 
5 | zcat -fcd $1 | grep -c '^[FR] '
6 | 


--------------------------------------------------------------------------------
/results/data/download.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -euo pipefail
2 | 
3 | for d in chr22 ecoli
4 | do
5 |   cd $d
6 |   make
7 |   cd ..
8 | done
9 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/err-corr-plots/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | all:
3 | 	pdflatex errcorr
4 | 
5 | .PHONY: all


--------------------------------------------------------------------------------
/scripts/build/multik-build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -euo pipefail
4 | 
5 | for k in 31 63 95 127
6 | do
7 |   make MAXK=$k $@
8 | done
9 | 


--------------------------------------------------------------------------------
/scripts/bash/links-bytes.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -eou pipefail
4 | 
5 | zcat -fcd $1 | grep '^[FR] ' | awk '{x=x+int(($2+3)/4)}END{print x}'
6 | 


--------------------------------------------------------------------------------
/results/data/ecoli/README.txt:
--------------------------------------------------------------------------------
1 | 
2 | Ecoli K12 reference genome
3 | 
4 | http://www.ncbi.nlm.nih.gov/nuccore/NC_000913.3
5 | 
6 | Download with: `make`
7 | 
8 | 


--------------------------------------------------------------------------------
/results/file_buffering/results20150413mon.mac.csv:
--------------------------------------------------------------------------------
 1 | 0.312
 2 | 0.315
 3 | 0.314
 4 | 0.327
 5 | 0.328
 6 | 0.096
 7 | 0.097
 8 | 0.097
 9 | 0.098
10 | 0.102
11 | 


--------------------------------------------------------------------------------
/scripts/cortex_print_flanks.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 | set -euo pipefail
3 | 
4 | gzip -fcd $@ | awk -F '[ \t]' 'm{print $0;m=0;} /^>bubble\..*\.5pflank/{print $1; m=1;}'
5 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/stoch.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20161012wed/stoch.pdf


--------------------------------------------------------------------------------
/results/klebsiella/kleb_pneumoniae/indels/about.txt:
--------------------------------------------------------------------------------
1 | Isaac Turner
2 | 2015-12-21
3 | 
4 | Plot indel distribution from freebayes, platypus, cortex, mccortex
5 | 
6 | ./run.sh
7 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160912mon/stoch.cov.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20160912mon/stoch.cov.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/perfect.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20161012wed/perfect.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/stocherr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20161012wed/stocherr.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/README.md:
--------------------------------------------------------------------------------
1 | 
2 | human `chr22:28,000,000-28,999,999`, contigs assembled with `--no-missing-check` argument.
3 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160912mon/perfect.cov.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20160912mon/perfect.cov.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160929thurs/stoch.cov.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20160929thurs/stoch.cov.pdf


--------------------------------------------------------------------------------
/libs/misc/README.md:
--------------------------------------------------------------------------------
1 | Misc third party code used in McCortex
2 | 
3 | CityHash is written in C++ so we pull in a re-written version in C from:
4 | https://github.com/nusov/cityhash-c
5 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160912mon/stocherr.cov.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20160912mon/stocherr.cov.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160929thurs/perfect.cov.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20160929thurs/perfect.cov.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160929thurs/stocherr.cov.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20160929thurs/stocherr.cov.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/perfect_nope.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20161012wed/perfect_nope.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/stocherrcorr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20161012wed/stocherrcorr.pdf


--------------------------------------------------------------------------------
/results/file_buffering/results20150413mon.mac.txt:
--------------------------------------------------------------------------------
1 | Reading: results20150413mon.csv 
2 | Rows: 10 
3 | 1 [1:5] mean: 0.3192 stddev: 0.007661593
4 | 2 [6:10] mean: 0.098 stddev: 0.002345208
5 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160929thurs/perfect_no_pe.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20160929thurs/perfect_no_pe.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/README.md:
--------------------------------------------------------------------------------
1 | 
2 | human `chr22:28,000,000-28,999,999`, contigs assembled with `--no-missing-check --confid-step 0.8` argument.
3 | 


--------------------------------------------------------------------------------
/scripts/build/update.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -oeu pipefail
 4 | 
 5 | cd libs && make clean && cd ..
 6 | make clean
 7 | git pull
 8 | git submodule update --init --recursive
 9 | make
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-pe.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-pe.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/linkcounts.pe.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/linkcounts.pe.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/linkcounts.se.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/linkcounts.se.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/pe-vs-sga-errs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/pe-vs-sga-errs.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/pe-vs-sga-ng50.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/pe-vs-sga-ng50.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect_no_pe.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect_no_pe.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-links.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-links.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-pe-corr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-pe-corr.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stoch.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stoch.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stoch.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stoch.pdf


--------------------------------------------------------------------------------
/libs/maximal_substrs/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | maxsubstrs: maxsubstrs.c stream_buffer.h
 3 | 	$(CC) -Wall -Wextra -o $@ $<
 4 | 
 5 | all: maxsubstrs
 6 | 
 7 | clean:
 8 | 	rm -rf maxsubstrs
 9 | 
10 | .PHONY: all clean
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-sga-errs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-sga-errs.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-sga-ng50.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-sga-ng50.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/links-vs-sga-errs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/links-vs-sga-errs.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/links-vs-sga-ng50.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/links-vs-sga-ng50.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-links-corr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/plain-vs-links-corr.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.linkcounts.pe.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.linkcounts.pe.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.linkcounts.se.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.linkcounts.se.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/perfect.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/perfect.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherr.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/perfect.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/perfect.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherr.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.linkcounts.pe.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.linkcounts.pe.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.linkcounts.se.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.linkcounts.se.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-pe.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-pe.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-sga-errs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-sga-errs.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-sga-ng50.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-sga-ng50.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-raw-sga-errs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-raw-sga-errs.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-raw-sga-ng50.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-pe-vs-raw-sga-ng50.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1920,0
 3 | 31,3161,0
 4 | 41,5928,0
 5 | 51,12729,0
 6 | 61,27317,0
 7 | 71,71586,0
 8 | 81,75608,0
 9 | 91,16878,0
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.linkcounts.pe.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.linkcounts.pe.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.linkcounts.se.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.linkcounts.se.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/perfect_no_pe.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/perfect_no_pe.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-pe.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-pe.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherrcorr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherrcorr.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/pe-vs-sga-errs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/pe-vs-sga-errs.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/pe-vs-sga-ng50.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/pe-vs-sga-ng50.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/perfect_no_pe.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/perfect_no_pe.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-links.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-links.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherrcorr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherrcorr.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160912mon/perfect.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,969,0
 3 | 31,1729,0
 4 | 41,2909,0
 5 | 51,5501,0
 6 | 61,13480,0
 7 | 71,21068,0
 8 | 81,38623,0
 9 | 91,48050,0
10 | 99,52592,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160912mon/stoch.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,969,0
 3 | 31,1729,0
 4 | 41,2909,0
 5 | 51,5501,0
 6 | 61,13480,0
 7 | 71,21068,0
 8 | 81,38623,0
 9 | 91,18068,0
10 | 99,108,1
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160929thurs/stoch.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,969,0
 3 | 31,1729,0
 4 | 41,2909,0
 5 | 51,5501,0
 6 | 61,13480,0
 7 | 71,21068,0
 8 | 81,38623,0
 9 | 91,16634,0
10 | 99,108,5
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160929thurs/stocherr.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,961,55
 3 | 31,1729,19
 4 | 41,2909,1
 5 | 51,5501,0
 6 | 61,13278,0
 7 | 71,21068,0
 8 | 81,11700,0
 9 | 91,138,1
10 | 99,21,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/perfect.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,969,0
 3 | 31,1729,0
 4 | 41,2909,0
 5 | 51,5501,0
 6 | 61,13480,0
 7 | 71,21068,0
 8 | 81,38623,0
 9 | 91,48050,0
10 | 99,52592,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/stoch.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,969,0
 3 | 31,1729,0
 4 | 41,2909,0
 5 | 51,5501,0
 6 | 61,13480,0
 7 | 71,21068,0
 8 | 81,38623,0
 9 | 91,16634,0
10 | 99,108,5
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/stocherr.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1343,127
 3 | 31,2755,83
 4 | 41,5821,53
 5 | 51,14476,22
 6 | 61,36204,7
 7 | 71,48050,1
 8 | 81,12589,0
 9 | 91,138,1
10 | 99,21,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/stocherr.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,961,55
 3 | 31,1729,19
 4 | 41,2909,1
 5 | 51,5501,0
 6 | 61,13278,0
 7 | 71,21068,0
 8 | 81,11700,0
 9 | 91,138,1
10 | 99,21,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-raw-sga-errs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-raw-sga-errs.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-raw-sga-ng50.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/corr-links-vs-raw-sga-ng50.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,94840,0
 3 | 31,94840,0
 4 | 41,94840,0
 5 | 51,94840,0
 6 | 61,94840,0
 7 | 71,94840,0
 8 | 81,94840,0
 9 | 91,94840,0
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,22583,7
 3 | 31,52417,9
 4 | 41,76714,6
 5 | 51,85267,8
 6 | 61,125711,5
 7 | 71,125711,2
 8 | 81,125659,0
 9 | 91,123940,0
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1920,0
 3 | 31,3161,0
 4 | 41,5928,0
 5 | 51,12729,0
 6 | 61,27317,0
 7 | 71,71586,0
 8 | 81,75608,0
 9 | 91,94391,0
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,94690,4
 3 | 31,94690,3
 4 | 41,94391,2
 5 | 51,94391,2
 6 | 61,94391,0
 7 | 71,94391,0
 8 | 81,94391,0
 9 | 91,17625,0
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,2539,106
 3 | 31,5982,59
 4 | 41,13935,29
 5 | 51,73223,19
 6 | 61,59423,3
 7 | 71,67617,10
 8 | 81,14442,4
 9 | 91,139,4
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1866,15
 3 | 31,3126,6
 4 | 41,5928,0
 5 | 51,12729,0
 6 | 61,27317,0
 7 | 71,60471,0
 8 | 81,14272,0
 9 | 91,139,4
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.linkcounts.pe.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.linkcounts.pe.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.linkcounts.se.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.linkcounts.se.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/pe-vs-sga-errs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/pe-vs-sga-errs.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/pe-vs-sga-ng50.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/pe-vs-sga-ng50.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-links.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-links.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-pe-corr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-pe-corr.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160912mon/perfect.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,52592,1
 3 | 31,52592,1
 4 | 41,52592,1
 5 | 51,52592,0
 6 | 61,52592,0
 7 | 71,52592,0
 8 | 81,52592,0
 9 | 91,52592,0
10 | 99,52592,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160912mon/stoch.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,74568,6
 3 | 31,74568,4
 4 | 41,60396,3
 5 | 51,52592,0
 6 | 61,52592,0
 7 | 71,52592,0
 8 | 81,52592,0
 9 | 91,18978,0
10 | 99,108,1
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160912mon/stocherr.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,957,40
 3 | 31,1729,1
 4 | 41,2909,0
 5 | 51,5501,0
 6 | 61,13480,0
 7 | 71,21068,0
 8 | 81,12431,0
 9 | 91,137,3
10 | 99,108675,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160929thurs/perfect.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,969,0
 3 | 31,1729,0
 4 | 41,2909,0
 5 | 51,5501,0
 6 | 61,13480,0
 7 | 71,21068,0
 8 | 81,38623,0
 9 | 91,48050,0
10 | 99,52592,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160929thurs/stoch.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,74568,5
 3 | 31,74568,4
 4 | 41,74568,3
 5 | 51,52592,0
 6 | 61,52592,0
 7 | 71,52592,0
 8 | 81,52592,0
 9 | 91,17998,0
10 | 99,108,5
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160929thurs/stoch.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,42887,10
 3 | 31,68825,5
 4 | 41,93607,5
 5 | 51,102363,3
 6 | 61,164617,2
 7 | 71,181787,2
 8 | 81,228304,1
 9 | 91,23945,0
10 | 99,108,5
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160929thurs/stocherr.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1552,135
 3 | 31,2655,86
 4 | 41,5732,71
 5 | 51,10941,20
 6 | 61,31864,5
 7 | 71,31864,1
 8 | 81,11749,0
 9 | 91,138,1
10 | 99,21,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160929thurs/stocherr.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1343,127
 3 | 31,2755,83
 4 | 41,5821,53
 5 | 51,14476,22
 6 | 61,36204,7
 7 | 71,48050,1
 8 | 81,12589,0
 9 | 91,138,1
10 | 99,21,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/perfect.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,52592,1
 3 | 31,52592,1
 4 | 41,52592,1
 5 | 51,52592,0
 6 | 61,52592,0
 7 | 71,52592,0
 8 | 81,52592,0
 9 | 91,52592,0
10 | 99,52592,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/stoch.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,74568,5
 3 | 31,74568,4
 4 | 41,74568,3
 5 | 51,52592,0
 6 | 61,52592,0
 7 | 71,52592,0
 8 | 81,52592,0
 9 | 91,17998,0
10 | 99,108,5
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/stoch.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,42887,10
 3 | 31,68825,5
 4 | 41,93607,5
 5 | 51,102363,3
 6 | 61,164617,2
 7 | 71,181787,2
 8 | 81,228304,1
 9 | 91,23945,0
10 | 99,108,5
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/stocherr.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1552,135
 3 | 31,2655,86
 4 | 41,5732,71
 5 | 51,10941,20
 6 | 61,31864,5
 7 | 71,31864,1
 8 | 81,11749,0
 9 | 91,138,1
10 | 99,21,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/stocherrcorr.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,959,3
 3 | 31,1763,0
 4 | 41,2909,0
 5 | 51,5501,0
 6 | 61,13278,0
 7 | 71,21068,0
 8 | 81,38623,0
 9 | 91,10786,5
10 | 99,21,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,24439,10
 3 | 31,52419,18
 4 | 41,76714,12
 5 | 51,91821,18
 6 | 61,125711,10
 7 | 71,125711,4
 8 | 81,124051,2
 9 | 91,18437,1
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,2609,101
 3 | 31,5690,58
 4 | 41,13546,39
 5 | 51,33392,27
 6 | 61,59423,10
 7 | 71,67616,1
 8 | 81,14272,1
 9 | 91,139,4
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.sga.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,2010,344
 3 | 31,3232,357
 4 | 41,4706,345
 5 | 51,7206,226
 6 | 61,12176,108
 7 | 71,38131,21
 8 | 81,93964,5
 9 | 91,16165,5
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,2706,106
 3 | 31,4690,65
 4 | 41,12934,26
 5 | 51,58441,15
 6 | 61,74941,4
 7 | 71,89246,8
 8 | 81,120171,3
 9 | 91,13454,6
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1920,11
 3 | 31,3126,0
 4 | 41,5928,0
 5 | 51,12729,0
 6 | 61,27317,0
 7 | 71,71586,0
 8 | 81,75608,0
 9 | 91,13382,5
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/links-vs-sga-errs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/links-vs-sga-errs.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/links-vs-sga-ng50.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/links-vs-sga-ng50.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-pe-corr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-pe-corr.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stoch.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1920,0
 3 | 31,3161,0
 4 | 41,5928,0
 5 | 51,12729,0
 6 | 61,27317,0
 7 | 71,71586,0
 8 | 81,75608,0
 9 | 91,16878,0
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-pe-vs-sga-errs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-pe-vs-sga-errs.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-pe-vs-sga-ng50.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-pe-vs-sga-ng50.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/links-vs-sga-errs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/links-vs-sga-errs.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/links-vs-sga-ng50.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/links-vs-sga-ng50.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-links-corr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/plain-vs-links-corr.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stoch.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1920,0
 3 | 31,3161,0
 4 | 41,5928,0
 5 | 51,12729,0
 6 | 61,27317,0
 7 | 71,71586,0
 8 | 81,75608,0
 9 | 91,16878,0
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160912mon/stocherr.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1335,89
 3 | 31,3512,55
 4 | 41,5173,70
 5 | 51,11842,28
 6 | 61,22376,8
 7 | 71,31866,0
 8 | 81,13029,0
 9 | 91,137,3
10 | 99,108675,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160929thurs/perfect.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,52592,1
 3 | 31,52592,1
 4 | 41,52592,1
 5 | 51,52592,0
 6 | 61,52592,0
 7 | 71,52592,0
 8 | 81,52592,0
 9 | 91,52592,0
10 | 99,52592,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160929thurs/perfect.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,42887,5
 3 | 31,56913,4
 4 | 41,68833,3
 5 | 51,102361,3
 6 | 61,164617,2
 7 | 71,181787,2
 8 | 81,181787,1
 9 | 91,228304,0
10 | 99,228304,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/perfect.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,42887,5
 3 | 31,56913,4
 4 | 41,68833,3
 5 | 51,102361,3
 6 | 61,164617,2
 7 | 71,181787,2
 8 | 81,181787,1
 9 | 91,228304,0
10 | 99,228304,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/stocherrcorr.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1826,97
 3 | 31,3025,85
 4 | 41,5580,76
 5 | 51,12459,26
 6 | 61,31866,13
 7 | 71,28039,1
 8 | 81,42610,0
 9 | 91,11077,5
10 | 99,21,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/stocherrcorr.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1548,97
 3 | 31,2653,68
 4 | 41,6189,65
 5 | 51,15642,18
 6 | 61,34269,12
 7 | 71,39181,0
 8 | 81,50743,0
 9 | 91,11697,5
10 | 99,21,0
11 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,2849,98
 3 | 31,4395,86
 4 | 41,13780,41
 5 | 51,41104,25
 6 | 61,75074,12
 7 | 71,75599,0
 8 | 81,94391,0
 9 | 91,13390,5
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.sga.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,2010,736
 3 | 31,3176,765
 4 | 41,4032,735
 5 | 51,5845,357
 6 | 61,10533,137
 7 | 71,30368,24
 8 | 81,93964,7
 9 | 91,16165,4
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-pe-vs-sga-errs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-pe-vs-sga-errs.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-pe-vs-sga-ng50.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-pe-vs-sga-ng50.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/perfect.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,94392,0
 3 | 31,94392,0
 4 | 41,94392,0
 5 | 51,94392,0
 6 | 61,94392,0
 7 | 71,94392,0
 8 | 81,94392,0
 9 | 91,94392,0
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/perfect.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1920,0
 3 | 31,3161,0
 4 | 41,5928,0
 5 | 51,12729,0
 6 | 61,27317,0
 7 | 71,71586,0
 8 | 81,75608,0
 9 | 91,94391,0
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-links-corr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/plain-vs-links-corr.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stoch.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,94392,5
 3 | 31,94392,4
 4 | 41,94391,3
 5 | 51,94391,2
 6 | 61,94391,2
 7 | 71,94391,1
 8 | 81,94391,1
 9 | 91,17625,0
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherr.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1866,15
 3 | 31,3126,6
 4 | 41,5928,0
 5 | 51,12729,0
 6 | 61,27317,0
 7 | 71,60471,0
 8 | 81,14272,0
 9 | 91,139,4
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/perfect.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,94840,0
 3 | 31,94840,0
 4 | 41,94840,0
 5 | 51,94840,0
 6 | 61,94840,0
 7 | 71,94840,0
 8 | 81,94840,0
 9 | 91,94840,0
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/perfect.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,22583,8
 3 | 31,52417,9
 4 | 41,76714,6
 5 | 51,85267,8
 6 | 61,125711,8
 7 | 71,125711,7
 8 | 81,125659,2
 9 | 91,123940,1
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/perfect.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1920,0
 3 | 31,3161,0
 4 | 41,5928,0
 5 | 51,12729,0
 6 | 61,27317,0
 7 | 71,71586,0
 8 | 81,75608,0
 9 | 91,94391,0
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stoch.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,94690,4
 3 | 31,94690,3
 4 | 41,94391,2
 5 | 51,94391,2
 6 | 61,94391,2
 7 | 71,94391,1
 8 | 81,94391,1
 9 | 91,17625,0
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherr.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1866,15
 3 | 31,3126,6
 4 | 41,5928,0
 5 | 51,12729,0
 6 | 61,27317,0
 7 | 71,60471,0
 8 | 81,14272,0
 9 | 91,139,4
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-links-vs-sga-errs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-links-vs-sga-errs.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-links-vs-sga-ng50.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/corr-links-vs-sga-ng50.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/perfect.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,22583,7
 3 | 31,52417,9
 4 | 41,76714,6
 5 | 51,85267,8
 6 | 61,125711,8
 7 | 71,125711,7
 8 | 81,125659,2
 9 | 91,123940,1
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stoch.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,24439,8
 3 | 31,50982,19
 4 | 41,76714,12
 5 | 51,91821,18
 6 | 61,125711,16
 7 | 71,125711,14
 8 | 81,125659,5
 9 | 91,19076,3
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherr.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,6155,763
 3 | 31,7638,540
 4 | 41,25027,225
 5 | 51,74994,77
 6 | 61,75074,52
 7 | 71,72574,17
 8 | 81,14494,10
 9 | 91,139,4
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherrcorr.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1920,11
 3 | 31,3126,0
 4 | 41,5928,0
 5 | 51,12729,0
 6 | 61,27317,0
 7 | 71,71586,0
 8 | 81,75608,0
 9 | 91,13382,5
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-links-vs-sga-errs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-links-vs-sga-errs.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-links-vs-sga-ng50.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcveanlab/mccortex/HEAD/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/corr-links-vs-sga-ng50.pdf


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stoch.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,24439,10
 3 | 31,52419,14
 4 | 41,76714,12
 5 | 51,91821,18
 6 | 61,125711,16
 7 | 71,125711,14
 8 | 81,125659,5
 9 | 91,19076,3
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherr.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,5676,710
 3 | 31,7289,570
 4 | 41,18591,182
 5 | 51,44614,74
 6 | 61,67584,40
 7 | 71,75599,14
 8 | 81,14421,9
 9 | 91,139,4
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherr.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,6248,745
 3 | 31,8251,536
 4 | 41,25027,225
 5 | 51,74994,76
 6 | 61,75074,48
 7 | 71,72574,17
 8 | 81,14494,10
 9 | 91,139,4
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherr.sga.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,2010,344
 3 | 31,3232,357
 4 | 41,4706,345
 5 | 51,7206,226
 6 | 61,12176,108
 7 | 71,38131,21
 8 | 81,93964,5
 9 | 91,16165,5
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherrcorr.plain.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,1920,11
 3 | 31,3126,0
 4 | 41,5928,0
 5 | 51,12729,0
 6 | 61,27317,0
 7 | 71,71586,0
 8 | 81,75608,0
 9 | 91,13382,5
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherr.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,5555,737
 3 | 31,6687,548
 4 | 41,18591,182
 5 | 51,44614,73
 6 | 61,67584,41
 7 | 71,75599,14
 8 | 81,14421,9
 9 | 91,139,4
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherr.sga.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,2010,344
 3 | 31,3232,357
 4 | 41,4706,345
 5 | 51,7206,226
 6 | 61,12176,108
 7 | 71,38131,21
 8 | 81,93964,5
 9 | 91,16165,5
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherrcorr.sga.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,2010,736
 3 | 31,3176,765
 4 | 41,4032,735
 5 | 51,5845,357
 6 | 61,10533,137
 7 | 71,30368,24
 8 | 81,93964,7
 9 | 91,16165,4
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherrcorr.sga.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,2010,736
 3 | 31,3176,765
 4 | 41,4032,735
 5 | 51,5845,357
 6 | 61,10533,137
 7 | 71,30368,24
 8 | 81,93964,7
 9 | 91,16165,4
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherrcorr.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,6630,558
 3 | 31,6976,586
 4 | 41,27260,147
 5 | 51,50329,84
 6 | 61,74915,45
 7 | 71,93763,16
 8 | 81,94391,5
 9 | 91,13390,5
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/stocherrcorr.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,7439,560
 3 | 31,8287,518
 4 | 41,30490,168
 5 | 51,75065,90
 6 | 61,75501,44
 7 | 71,94643,17
 8 | 81,100768,18
 9 | 91,13644,13
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherrcorr.links.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,4205,570
 3 | 31,6976,549
 4 | 41,18580,158
 5 | 51,50329,84
 6 | 61,74915,45
 7 | 71,93763,16
 8 | 81,94391,5
 9 | 91,13390,5
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/stocherrcorr.pe.csv:
--------------------------------------------------------------------------------
 1 | K,NG50,AssemblyErrors
 2 | 21,4136,542
 3 | 31,8355,524
 4 | 41,22086,206
 5 | 51,75065,97
 6 | 61,75501,44
 7 | 71,94643,17
 8 | 81,100768,18
 9 | 91,13644,13
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /bin
 2 | /build/
 3 | /libs
 4 | /docs
 5 | /src/global/version.h
 6 | /tests
 7 | /results
 8 | /benchmark
 9 | /dev
10 | *.RData
11 | *.Rhistory
12 | *.DS_Store
13 | *.pyc
14 | *.vagrant
15 | 
16 | /commit.txt
17 | /notes.txt
18 | /tests.txt
19 | 


--------------------------------------------------------------------------------
/results/data/PhiX/about.txt:
--------------------------------------------------------------------------------
1 | Illumina PhiX data
2 | 
3 | PhiX generated from: http://blog.basespace.illumina.com/2012/10/02/new-miseq-datasets/
4 | 
5 | zcat /data/illumina/PhiX_S1_L001_R1_001.fastq.gz | \
6 |   head -4000000 | tail -400000 | gzip -c > PhiX.100K.1.fq.gz
7 | 


--------------------------------------------------------------------------------
/tests/bubbles/bubbles5/truth.vcf:
--------------------------------------------------------------------------------
1 | ##fileformat=VCFv4.1
2 | ##fileDate=20151111
3 | ##reference=ref/ref.fa
4 | ##contig=<ID=ref,length=91>
5 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
6 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	GT	MrBond
7 | ref	49	.	T	A	.	PASS	.	GT	0/1
8 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
 1 | Isaac Turner (turner.isaac@gmail.com)
 2 | PhD Supervisor: Prof Gil McVean
 3 | 
 4 | cortex_var:
 5 | Zam Iqbal (zam@well.ox.ac.uk)
 6 | Mario Caccamo (mario.caccamo@bbsrc.ac.uk)
 7 | 
 8 | Bug reports, profiling, feedback, documentation etc.:
 9 | Kiran Garimella
10 | Torsten Seemann
11 | Jerome Kelleher
12 | 


--------------------------------------------------------------------------------
/libs/cJSON/tests/test2:
--------------------------------------------------------------------------------
 1 | {"menu": {
 2 |   "id": "file",
 3 |   "value": "File",
 4 |   "popup": {
 5 |     "menuitem": [
 6 |       {"value": "New", "onclick": "CreateNewDoc()"},
 7 |       {"value": "Open", "onclick": "OpenDoc()"},
 8 |       {"value": "Close", "onclick": "CloseDoc()"}
 9 |     ]
10 |   }
11 | }}
12 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls0/calls.vcf:
--------------------------------------------------------------------------------
1 | ##fileformat=VCFv4.2
2 | ##FILTER=<ID=PASS,Description="All filters passed">
3 | ##fileDate=20151014
4 | ##reference=ref/ref.fa
5 | ##contig=<ID=ref,length=200>
6 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
7 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	MasterGrunt
8 | 


--------------------------------------------------------------------------------
/results/file_buffering/README.txt:
--------------------------------------------------------------------------------
1 | Record time to read a sequence file buffered vs un-buffered
2 | 
3 | ./file-buffering.sh ../data/chr22/chr22.fa > results20150413mon.mac.csv
4 | ../hash_table_benchmark/stats.R results20150413mon.mac.csv > results20150413mon.mac.txt
5 | 
6 | Mac files generated on my laptop (MacBook Pro)
7 | 


--------------------------------------------------------------------------------
/src/kmer/kmer_size.h:
--------------------------------------------------------------------------------
 1 | #ifndef KMER_SIZE_H_
 2 | #define KMER_SIZE_H_
 3 | 
 4 | // Files that are not compiled with MIN_KMER_SIZE and MAX_KMER_SIZE link to
 5 | // this object file and discover kmer size limits at run time
 6 | 
 7 | int get_min_kmer_size();
 8 | int get_max_kmer_size();
 9 | 
10 | #endif /* KMER_SIZE_H_ */
11 | 


--------------------------------------------------------------------------------
/src/basic/str_parsing.h:
--------------------------------------------------------------------------------
 1 | #ifndef STR_PARSING_H_
 2 | #define STR_PARSING_H_
 3 | 
 4 | #include "common_buffers.h"
 5 | 
 6 | // Parse a comma separated list e.g. "12,3,12"
 7 | // Returns <0 on error, otherwise number of chars used
 8 | int comma_list_to_array(const char *str, SizeBuffer *nums);
 9 | 
10 | #endif /* STR_PARSING_H_ */
11 | 


--------------------------------------------------------------------------------
/src/kmer/kmer_size.c:
--------------------------------------------------------------------------------
 1 | #include "kmer_size.h"
 2 | 
 3 | // Files that are not compiled with MIN_KMER_SIZE and MAX_KMER_SIZE link to
 4 | // this object file and discover kmer size limits at run time
 5 | 
 6 | int get_min_kmer_size()
 7 | {
 8 |   return MIN_KMER_SIZE;
 9 | }
10 | 
11 | int get_max_kmer_size()
12 | {
13 |   return MAX_KMER_SIZE;
14 | }
15 | 


--------------------------------------------------------------------------------
/dev/bkmer_revcmp/Makefile:
--------------------------------------------------------------------------------
 1 | NWORDS=1
 2 | SHELL:=/bin/bash
 3 | 
 4 | all: revcmp
 5 | 
 6 | clean:
 7 | 	rm -rf revcmp
 8 | 
 9 | revcmp: revcmp.c
10 | 	$(CC) -O4 -Wall -Wextra -DNUM_BKMER_WORDS=$(NWORDS) -o $@ $<
11 | 
12 | profile:
13 | 	for i in {1..5}; do for m in {0..4}; do time ./revcmp -m $$m -n 1000000000; done; done
14 | 
15 | .PHONY: all clean profile
16 | 


--------------------------------------------------------------------------------
/tests/pjoin/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/bash -euo pipefail
 2 | 
 3 | # Each test case is in a separate sub-directory
 4 | 
 5 | # pjoin0:
 6 | # pjoin1:
 7 | 
 8 | all:
 9 | 	cd pjoin0 && $(MAKE)
10 | 	cd pjoin1 && $(MAKE)
11 | 	@echo "All looks good."
12 | 
13 | clean:
14 | 	cd pjoin0 && $(MAKE) clean
15 | 	cd pjoin1 && $(MAKE) clean
16 | 
17 | .PHONY: all clean
18 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/make-csv.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -eou pipefail
 3 | 
 4 | echo "K,NG50,AssemblyErrors"
 5 | for f in $@
 6 | do
 7 |   K=`echo $f | grep -oE 'k[0-9]+' | grep -oE -m1 '[0-9]+$'`
 8 |   NG50=`grep 'NG50:' $f | grep -oE '[0-9]+$'`
 9 |   ERRORS=`grep 'assembly_errors:' $f | grep -oE '[0-9]+$'`
10 |   echo "$K,$NG50,$ERRORS"
11 | done
12 | 


--------------------------------------------------------------------------------
/tests/vcfcov/ref/ref.fa:
--------------------------------------------------------------------------------
1 | >chr0
2 | ATATTGATCCCC
3 | >ref
4 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG
5 | >chr1
6 | TGGGCCAGTACGGTGAATCCCTGATGATCCGCATAGTTTGTAAGTCAAAATGGCGACCGGTCGGTGGGTGTGTAGAGCAACCGGAAAGCTTGCCTTATAG
7 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.linkcounts.se.csv:
--------------------------------------------------------------------------------
 1 | K	n_graph_kmers	n_link_kmers	n_links	link_junction_mem
 2 | 21	943524	14735	498094	1.7MB
 3 | 31	973199	6547	63108	114.8KB
 4 | 41	985939	2981	11506	13.5KB
 5 | 51	991526	1325	3306	3.4KB
 6 | 61	994205	654	1324	1.3KB
 7 | 71	995708	349	554	554B
 8 | 81	996639	159	228	228B
 9 | 91	997273	62	74	74B
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.linkcounts.se.csv:
--------------------------------------------------------------------------------
 1 | K	n_graph_kmers	n_link_kmers	n_links	link_junction_mem
 2 | 21	943521	14735	407180	1.3MB
 3 | 31	973196	6547	56366	100.6KB
 4 | 41	985936	2980	10845	12.6KB
 5 | 51	991523	1321	3188	3.3KB
 6 | 61	994202	644	1284	1.3KB
 7 | 71	995705	338	526	526B
 8 | 81	996636	145	206	206B
 9 | 91	997225	49	58	58B
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stoch.linkcounts.pe.csv:
--------------------------------------------------------------------------------
 1 | K	n_graph_kmers	n_link_kmers	n_links	link_junction_mem
 2 | 21	943521	14739	985984	6.3MB
 3 | 31	973196	6553	132965	385.8KB
 4 | 41	985936	3010	26048	46KB
 5 | 51	991523	1385	7719	10.8KB
 6 | 61	994202	734	2760	3.3KB
 7 | 71	995705	441	1130	1.2KB
 8 | 81	996636	271	499	513B
 9 | 91	997225	173	238	238B
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.linkcounts.pe.csv:
--------------------------------------------------------------------------------
 1 | K	n_graph_kmers	n_link_kmers	n_links	link_junction_mem
 2 | 21	944420	15846	521385	1.8MB
 3 | 31	973255	6824	82265	168.1KB
 4 | 41	985901	3030	14754	18.8KB
 5 | 51	991494	1365	4509	4.9KB
 6 | 61	994174	713	1857	1.9KB
 7 | 71	995658	419	830	833B
 8 | 81	995784	253	407	408B
 9 | 91	409278	18	26	26B
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherr.linkcounts.se.csv:
--------------------------------------------------------------------------------
 1 | K	n_graph_kmers	n_link_kmers	n_links	link_junction_mem
 2 | 21	944420	15814	510453	1.7MB
 3 | 31	973255	6789	67188	123.2KB
 4 | 41	985901	3006	11976	14.1KB
 5 | 51	991494	1331	3386	3.5KB
 6 | 61	994174	647	1326	1.3KB
 7 | 71	995658	343	543	543B
 8 | 81	995784	150	215	215B
 9 | 91	409278	18	26	26B
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.linkcounts.se.csv:
--------------------------------------------------------------------------------
 1 | K	n_graph_kmers	n_link_kmers	n_links	link_junction_mem
 2 | 21	943614	15479	506506	1.7MB
 3 | 31	973203	6767	68039	124.5KB
 4 | 41	985939	3014	12391	14.6KB
 5 | 51	991526	1335	3554	3.7KB
 6 | 61	994205	651	1408	1.4KB
 7 | 71	995708	346	571	571B
 8 | 81	996638	155	227	227B
 9 | 91	994901	47	58	58B
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/perfect.linkcounts.pe.csv:
--------------------------------------------------------------------------------
 1 | K	n_graph_kmers	n_link_kmers	n_links	link_junction_mem
 2 | 21	943524	14739	1411791	9.9MB
 3 | 31	973199	6553	161398	490.4KB
 4 | 41	985939	3011	28853	51.7KB
 5 | 51	991526	1387	8211	11.6KB
 6 | 61	994205	739	2952	3.5KB
 7 | 71	995708	447	1199	1.2KB
 8 | 81	996639	278	537	556B
 9 | 91	997273	184	273	273B
10 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/stocherrcorr.linkcounts.pe.csv:
--------------------------------------------------------------------------------
 1 | K	n_graph_kmers	n_link_kmers	n_links	link_junction_mem
 2 | 21	943614	15502	523847	1.8MB
 3 | 31	973203	6792	72634	137KB
 4 | 41	985939	3036	15252	19.6KB
 5 | 51	991526	1375	5044	5.7KB
 6 | 61	994205	718	2037	2.1KB
 7 | 71	995708	423	849	851B
 8 | 81	996638	257	420	420B
 9 | 91	994901	152	201	201B
10 | 


--------------------------------------------------------------------------------
/tests/pop_bubbles/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/bash -euo pipefail
 2 | 
 3 | # Test bubble popping
 4 | # Each test case is in a separate sub-directory
 5 | 
 6 | all:
 7 | 	cd pop_bubbles1 && $(MAKE)
 8 | 	cd pop_bubbles2 && $(MAKE)
 9 | 	@echo "pop_bubbles: All looks good."
10 | 
11 | clean:
12 | 	cd pop_bubbles1 && $(MAKE) clean
13 | 	cd pop_bubbles2 && $(MAKE) clean
14 | 
15 | .PHONY: all clean
16 | 


--------------------------------------------------------------------------------
/results/klebsiella/kleb_pneumoniae/assembly/get-max-covg.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # usage: get-max-covg.sh <genome-size> <in.fa>
 4 | # output: <number_of_contigs> <sum_of_lengths> <longest_len> <shortest_len>
 5 | 
 6 | genome=$1
 7 | seqfile=$2
 8 | 
 9 | dnacat -L "$seqfile" | cut -f2 | sort -rn | \
10 |   awk '{if(x+$1>'$genome'){exit;} x+=$1; n+=1; l=$1; if(!f){f=$1}} END{print n,x,f,l}'
11 | 


--------------------------------------------------------------------------------
/results/data/ecoli/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | SHELL:=/bin/bash -euo pipefail
 3 | 
 4 | CTXDIR=../../../
 5 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat
 6 | 
 7 | all: ecoli.fa
 8 | 
 9 | NC_000913.fna:
10 | 	curl -o NC_000913.fna 'https://www.ncbi.nlm.nih.gov/nuccore/U00096.3?report=fasta&log$=seqview&format=text'
11 | 
12 | ecoli.fa: NC_000913.fna
13 | 	$(DNACAT) -M <(echo EColiK12) $< > $@
14 | 
15 | .PHONY: all
16 | 


--------------------------------------------------------------------------------
/results/data/chr22/uniq_flanks/README.txt:
--------------------------------------------------------------------------------
 1 | Isaac Turner
 2 | 2014 Sep 24
 3 | 
 4 | chr22.1Mbp.uniq.fa was generated by taking hg19 chr22:17,000,000-17,999,999 and
 5 | adding unique kmers (k=63) to the start and end.
 6 | 
 7 | Unique kmers added were:
 8 | GGTCGCACACAAATACTACGGGCATTGGATGCGACCATAAGTCTTGACAGGCTTTGTTCCCT
 9 | ACAACTTAACCTGGAACTAGAACTAATTTATGAGCGAGCCAGAACAGGTAGTCTGAGGGAGT
10 | 
11 | Regenerate with: `make`
12 | 


--------------------------------------------------------------------------------
/results/benchmark/10diploid10X/run-sim.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Ten diploid 1Kb genomes 20X covg
 4 | #
 5 | # To run:
 6 | #   ./run-sim.sh
 7 | #
 8 | # To clear up:
 9 | #   ./run-sim.sh clean
10 | #
11 | 
12 | make -f ../calling-comparison.mk \
13 |   SEQ=smaller.fa NUM_INDIVS=10 PLOIDY=2 KMER=31 \
14 |   SNPS=10 INDELS=10 INV=10 INVLEN=10 \
15 |   READLEN=100 MPSIZE=250 ALLELECOVG=10 \
16 |   MEMWIDTH=20 MEMHEIGHT=15 $@
17 | 


--------------------------------------------------------------------------------
/results/hash_table_benchmark/results20150409thurs.mac.txt:
--------------------------------------------------------------------------------
 1 | Reading: results20150409thurs.mac.csv 
 2 | Rows: 35 
 3 | 1 [1:5] mean: 57.9732 stddev: 2.082858
 4 | 2 [6:10] mean: 29.659 stddev: 3.398727
 5 | 3 [11:15] mean: 27.3986 stddev: 0.5796527
 6 | 4 [16:20] mean: 34.1938 stddev: 14.46233
 7 | 5 [21:25] mean: 40.0266 stddev: 0.5128282
 8 | 6 [26:30] mean: 24.392 stddev: 0.74178
 9 | 7 [31:35] mean: 25.5838 stddev: 1.673209
10 | 


--------------------------------------------------------------------------------
/libs/misc/mem_size.h:
--------------------------------------------------------------------------------
 1 | #ifndef MEM_SIZE_H_
 2 | #define MEM_SIZE_H_
 3 | 
 4 | /**
 5 |  * Returns the size of physical memory (RAM) in bytes.
 6 |  * Author:  David Robert Nadeau
 7 |  * Site:    http://NadeauSoftware.com/
 8 |  * License: Creative Commons Attribution 3.0 Unported License
 9 |  *          http://creativecommons.org/licenses/by/3.0/deed.en_US
10 |  */
11 | size_t getMemorySize();
12 | 
13 | #endif /* MEM_SIZE_H_ */
14 | 


--------------------------------------------------------------------------------
/tests/build/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/bash -euo pipefail
 2 | 
 3 | # Each test case is in a separate sub-directory
 4 | 
 5 | # build0: random sequence, sort graph, reassemble sequence
 6 | # build1: test --intersection and --graph arguments
 7 | 
 8 | all:
 9 | 	cd build0 && $(MAKE)
10 | 	cd build1 && $(MAKE)
11 | 	@echo "All looks good."
12 | 
13 | clean:
14 | 	cd build0 && $(MAKE) clean
15 | 	cd build1 && $(MAKE) clean
16 | 
17 | .PHONY: all clean
18 | 


--------------------------------------------------------------------------------
/scripts/R/install-deps.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript --vanilla
 2 | 
 3 | #
 4 | # Install all R packages required by McCortex R scripts
 5 | #
 6 | 
 7 | getpkg <- function(pkg) {
 8 |   if(!require(pkg, character.only=TRUE)) {
 9 |     install.packages(pkg, dep=TRUE, repos='http://cran.rstudio.com/')
10 |   }
11 | }
12 | 
13 | getpkg('ggplot2')
14 | getpkg('gridExtra')
15 | getpkg('reshape')
16 | getpkg('scales')
17 | getpkg('plyr')
18 | getpkg('cowplot')
19 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls3/hardy.fa:
--------------------------------------------------------------------------------
1 | >hardy1
2 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG
3 | >hardy2
4 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG
5 | 


--------------------------------------------------------------------------------
/results/correct/hg_chr22/README.txt:
--------------------------------------------------------------------------------
 1 | Isaac Turner
 2 | 2014-09-25
 3 | 
 4 | Using 1Mb of chr22 and empirical PhiX Illumina reads to simulate and measure
 5 | the power and error rate of read correction using a de Bruijn graph
 6 | 
 7 | Requires mccortex/results/data directory
 8 | 
 9 | To reproduce:
10 | 
11 |   cd libs && make core common && cd ..
12 |   make MAXK=31
13 |   cd results/correct
14 |   make
15 | 
16 | Runtime on my macbook is ~10 minutes
17 | 


--------------------------------------------------------------------------------
/results/README.txt:
--------------------------------------------------------------------------------
 1 | Data and code to reproduce experiments
 2 | 
 3 | In order to run any of these experiments, you must run the follow commands to
 4 | compile McCortex and download the required data:
 5 | 
 6 |     cd ..
 7 |     for k in 31 63 95 127; do make MAXK=$k; done
 8 |     cd results/data
 9 |     ./download.sh
10 | 
11 | You must also fetch and download the PhiX data yourself from Illumina's
12 | basespace. Details are in results/data/PhiX/about.txt
13 | 


--------------------------------------------------------------------------------
/results/contig_confidence/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL:=/bin/bash -euo pipefail
 2 | 
 3 | all: confidence_plot.pdf confidence.pdf
 4 | 
 5 | confidence_plot.pdf: plot-confidence.R
 6 | 	R --vanilla -f plot-confidence.R --args $@
 7 | 
 8 | confidence.pdf: confidence.tex confidence_plot.pdf .force
 9 | 	pdflatex confidence
10 | 	pdflatex confidence
11 | 
12 | clean:
13 | 	rm -rf confidence_plot.pdf confidence.{pdf,aux,log}
14 | 
15 | .force:
16 | 
17 | .PHONY: all clean .force
18 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls3/laurel.fa:
--------------------------------------------------------------------------------
1 | >laurel1
2 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG
3 | >laurel2 85G>A
4 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCaTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG
5 | 


--------------------------------------------------------------------------------
/results/benchmark/minidiploid/run-sim.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Single small diploid 1Kb 20X covg
 4 | #
 5 | # To run:
 6 | #   ./run-sim.sh
 7 | #
 8 | # To clear up:
 9 | #   ./run-sim.sh clean
10 | #
11 | 
12 | { echo '>rnd'; seqrnd 1000; } | facat > rnd.fa
13 | 
14 | make -f ../calling-comparison.mk \
15 |   SEQ=rnd.fa NUM_INDIVS=1 PLOIDY=2 KMER=31 \
16 |   SNPS=0 INDELS=100 INV=0 INVLEN=10 \
17 |   READLEN=100 MPSIZE=250 ALLELECOVG=10 \
18 |   MEMWIDTH=20 MEMHEIGHT=15 $@
19 | 


--------------------------------------------------------------------------------
/tests/clean_graph/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/bash -euo pipefail
 2 | 
 3 | # Test graph cleaning
 4 | # Each test case is in a separate sub-directory
 5 | 
 6 | all:
 7 | 	cd clean1 && $(MAKE)
 8 | 	cd clean2 && $(MAKE)
 9 | 	cd clean3 && $(MAKE)
10 | 	cd clean4 && $(MAKE)
11 | 	@echo "clean_graph: All looks good."
12 | 
13 | clean:
14 | 	cd clean1 && $(MAKE) clean
15 | 	cd clean2 && $(MAKE) clean
16 | 	cd clean3 && $(MAKE) clean
17 | 	cd clean4 && $(MAKE) clean
18 | 
19 | .PHONY: all clean
20 | 


--------------------------------------------------------------------------------
/travis/script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | echo "Branch: ${TRAVIS_BRANCH}"
 6 | echo "OS: ${TRAVIS_OS_NAME}"
 7 | echo "CC: ${CC}"
 8 | echo "Perl: ${TRAVIS_PERL_VERSION}"
 9 | 
10 | # The COVERITY_SCAN_BRANCH environment variable will be set to 1 when the
11 | # Coverity Scan addon is in operation
12 | # Only run if we are not doing Coverity Scan analysis
13 | if [ "${COVERITY_SCAN_BRANCH}" != 1 ]
14 | then
15 |   # Build and run all tests
16 |   cd tests && ./run.sh
17 | fi
18 | 


--------------------------------------------------------------------------------
/results/hash_table_benchmark/stats.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript --vanilla
 2 | 
 3 | args <- commandArgs(trailingOnly = TRUE)
 4 | 
 5 | if(length(args) != 1) {
 6 |   stop("Usage: stats.R <times.csv>");
 7 | }
 8 | 
 9 | cat('Reading:',args[1],'\n');
10 | 
11 | x=read.csv(file=args[1],as.is=T,header=F)
12 | 
13 | cat('Rows:',nrow(x),'\n');
14 | 
15 | i=1
16 | j=1
17 | 
18 | while(i <= nrow(x)) {
19 |   cat(j,' [',i,':',i+4,'] mean: ',mean(x[i:(i+4),1]),' stddev: ',sd(x[i:(i+4),1]),'\n',sep='');
20 |   i=i+5; j=j+1;
21 | }
22 | 


--------------------------------------------------------------------------------
/src/basic/common_buffers.h:
--------------------------------------------------------------------------------
 1 | #ifndef COMMON_BUFFERS_H_
 2 | #define COMMON_BUFFERS_H_
 3 | 
 4 | #include "madcrowlib/madcrow_buffer.h"
 5 | 
 6 | madcrow_buffer(char_ptr_buf, CharPtrBuffer, char*);
 7 | madcrow_buffer(byte_buf,     ByteBuffer,    uint8_t);
 8 | madcrow_buffer(uint32_buf,   Uint32Buffer,  uint32_t);
 9 | madcrow_buffer(int32_buf,    Int32Buffer,   int32_t);
10 | madcrow_buffer(size_buf,     SizeBuffer,    size_t);
11 | madcrow_buffer_wipe(zsize_buf, ZeroSizeBuffer, size_t);
12 | 
13 | #endif /* COMMON_BUFFERS_H_ */
14 | 


--------------------------------------------------------------------------------
/results/traversal_conjecture/README.txt:
--------------------------------------------------------------------------------
 1 | Isaac Turner
 2 | 2014 Sep 24
 3 | 
 4 | In order to re-run the experiment, from the mccortex directory run:
 5 | 
 6 |     # Fetch libraries needed
 7 |     cd libs && make core common && cd..
 8 |     # Compile McCortex
 9 |     make MAXK=31
10 |     # Generate the reference from chr22
11 |     cd results/data/chr22/uniq_flanks && make && cd ../../../..
12 |     # Run the experiment
13 |     cd results/exp_abc/1MbpHg19
14 |     make
15 | 
16 | That's it!
17 | 
18 | On my macbook run time is 40 mins
19 | 


--------------------------------------------------------------------------------
/results/data/chr22/about.txt:
--------------------------------------------------------------------------------
 1 | Isaac Turner
 2 | 2014-10-01
 3 | 
 4 | 1Mbp of chr22 from hg19 (GRCh37)
 5 | 
 6 | To generate:
 7 | 
 8 |     wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/chromosomes/chr22.fa.gz
 9 |     gzip -d chr22.fa.gz
10 |     samtools faidx chr22.fa chr22:17000000-17999999 | tr -d 'n' | tr -d 'N' > chr22_17M_18M.fa
11 | 
12 | Check it is 1Mbp still
13 | 
14 |     dnacat -s chr22_17M_18M.fa
15 | 
16 | Index with BWA and samtools
17 | 
18 |     bwa index chr22_17M_18M.fa
19 |     samtools faidx chr22_17M_18M.fa
20 | 
21 | 


--------------------------------------------------------------------------------
/tests/lossless/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/bash -euo pipefail
 2 | 
 3 | # Each test case is in a separate sub-directory
 4 | 
 5 | # We show that a sequence can be losslessly regenerated from its linked dBG
 6 | # lossless0: random sequence in a single read is regenerated
 7 | # lossless1: minimum required read length to reproduce exact contig
 8 | 
 9 | all:
10 | 	cd lossless0 && $(MAKE)
11 | 	cd lossless1 && $(MAKE)
12 | 	@echo "All looks good."
13 | 
14 | clean:
15 | 	cd lossless0 && $(MAKE) clean
16 | 	cd lossless1 && $(MAKE) clean
17 | 
18 | .PHONY: all clean
19 | 


--------------------------------------------------------------------------------
/libs/misc/Makefile:
--------------------------------------------------------------------------------
 1 | # These files are all compiled by the main Makefile,
 2 | # this is just here as documentation
 3 | 
 4 | PLATFORM := $(shell uname)
 5 | COMPILER := $(shell ($(CC) -v 2>&1) | tr A-Z a-z )
 6 | 
 7 | # clang Link Time Optimisation (lto) seems to have issues atm
 8 | ifneq (,$(findstring clang,$(COMPILER)))
 9 | 	OPT = -O3
10 | else
11 | 	OPT = -O4
12 | endif
13 | 
14 | CFLAGS=-Wall -Wextra $(OPT)
15 | 
16 | all: city.o mem_size.o
17 | 
18 | %.o: %.c %.h
19 | 	$(CC) $(CFLAGS) -c $*.c -o $*.o
20 | 
21 | clean:
22 | 	rm -rf *.o
23 | 
24 | .PNOHY: all clean
25 | 


--------------------------------------------------------------------------------
/tests/breakpoint/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/bash -euo pipefail
 2 | 
 3 | # Each test case is in a separate sub-directory
 4 | 
 5 | # breakpoint0: empty breakpoint calls (K=31)
 6 | # breakpoint1: test we get the correct VCF (K=11)
 7 | # breakpoint2: check we don't call any ref bubbles
 8 | 
 9 | all:
10 | 	cd breakpoint0 && $(MAKE)
11 | 	cd breakpoint1 && $(MAKE)
12 | 	cd breakpoint2 && $(MAKE)
13 | 	@echo "All looks good."
14 | 
15 | clean:
16 | 	cd breakpoint0 && $(MAKE) clean
17 | 	cd breakpoint1 && $(MAKE) clean
18 | 	cd breakpoint2 && $(MAKE) clean
19 | 
20 | .PHONY: all clean
21 | 


--------------------------------------------------------------------------------
/tests/bubbles/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/bash -euo pipefail
 2 | 
 3 | # Test bubble calling and decomposing to VCF
 4 | # Each test case is in a separate sub-directory
 5 | 
 6 | all:
 7 | 	cd bubbles1 && $(MAKE)
 8 | 	cd bubbles2 && $(MAKE)
 9 | 	cd bubbles3 && $(MAKE)
10 | 	cd bubbles4 && $(MAKE)
11 | 	cd bubbles5 && $(MAKE)
12 | 	@echo "bubbles: All looks good."
13 | 
14 | clean:
15 | 	cd bubbles1 && $(MAKE) clean
16 | 	cd bubbles2 && $(MAKE) clean
17 | 	cd bubbles3 && $(MAKE) clean
18 | 	cd bubbles4 && $(MAKE) clean
19 | 	cd bubbles5 && $(MAKE) clean
20 | 
21 | .PHONY: all clean
22 | 


--------------------------------------------------------------------------------
/scripts/analysis/mummer2vcf.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eou pipefail
 4 | 
 5 | snpfile=$1
 6 | reffile=$2
 7 | 
 8 | echo '##fileformat=VCFv4.1'
 9 | echo '##fileDate='`date '+%Y%m%d'`
10 | echo "##reference=$reffile"
11 | ~/c/dnacat/bin/dnacat -L $reffile | awk '{OFS=""; print "##chrom=<ID=",$1,",length=",$2,">"}'
12 | echo '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">'
13 | echo | awk '{OFS="\t"; print "#CHROM","POS","ID","REF","ALT","QUAL","FILTER","INFO","FORMAT"}'
14 | 
15 | grep -E '^\s*[0-9]' $snpfile | awk '{OFS="\t"; print $14,$1,".",$2,$3,".",".",".","GT"}'
16 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls4/calls.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.2
 2 | ##FILTER=<ID=PASS,Description="All filters passed">
 3 | ##fileDate=20151014
 4 | ##reference=ref/ref.fa
 5 | ##contig=<ID=ref,length=200>
 6 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 7 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT
 8 | ref	7	.	G	T	.	PASS	.	.
 9 | ref	28	.	A	C	.	PASS	.	.
10 | ref	49	.	GG	CGAT	.	PASS	.	.
11 | ref	71	.	A	GAT	.	PASS	.	.
12 | ref	92	.	GCACCAGGG	AC	.	PASS	.	.
13 | ref	121	.	C	G	.	PASS	.	.
14 | ref	142	.	GGCACAGCA	TTACTCTTC	.	PASS	.	.
15 | ref	171	.	A	CTAG	.	PASS	.	.
16 | ref	192	.	CATCATAG	A	.	PASS	.	.
17 | 


--------------------------------------------------------------------------------
/results/benchmark/diploid60X/run-sim.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Single diploid 1Mb genome 60X covg
 4 | #
 5 | # To run:
 6 | #   cd dir/this/is/in
 7 | #   ./run-sim.sh
 8 | #
 9 | # To clear up:
10 | #   cd dir/this/is/in
11 | #   ./run-sim.sh clean
12 | #
13 | 
14 | make -f ../calling-comparison.mk \
15 |   SEQ=../chr21.1Mb.fa NUM_INDIVS=1 PLOIDY=2 KMER=31 \
16 |   SNPS=15000 INDELS=7500 INV=750 INVLEN=100 \
17 |   READLEN=100 MPSIZE=250 ALLELECOVG=30 \
18 |   MEMWIDTH=20 MEMHEIGHT=20 MAPARGS='--substitutionrate=0.01 ' $@
19 | 
20 | # sites=0.01*sum(1/1)*L=0.01*1M = 10,000
21 | # sites=0.01*sum(1/1+1/2)*L = 15,000
22 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160912mon/notes.txt:
--------------------------------------------------------------------------------
 1 | At k=21, ~0.2%[1] of mutations add a new edge between existing kmers.
 2 | With an error rate of 0.5% and 100X coverage, there are 500,000 seqn errors[2].
 3 | Therefore we add approximately ~1325 new edges[3] to the graph with errors, even
 4 | if we clean off all kmers due to sequencing errors. This equates to an edge every
 5 | 1000bp.
 6 | 
 7 | 
 8 | [1] 7951 / (3*10**6), see seqn.errors.csv
 9 | [2] 100*10**6*0.005, 100X coverage, ref is 1Mbp, 0.5% sequencing error rate
10 | [3] 500000*0.002, 500,000 errors, 0.2% are likely to be edges between existing kmers
11 | 


--------------------------------------------------------------------------------
/scripts/perl/LineReader.pm:
--------------------------------------------------------------------------------
 1 | package LineReader;
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | use Carp;
 6 | 
 7 | sub new
 8 | {
 9 |   my ($class,$fh,$path) = @_;
10 | 
11 |   my $self = {
12 |       _fh => $fh,
13 |       _path => $path,
14 |       _next => []
15 |   };
16 | 
17 |   bless $self, $class;
18 |   return $self;
19 | }
20 | 
21 | sub read_line
22 | {
23 |   my ($self) = @_;
24 |   my $fh = $self->{_fh};
25 |   my $next = shift(@{$self->{_next}});
26 |   return defined($next) ? $next : <$fh>;
27 | }
28 | 
29 | sub unread_line
30 | {
31 |   my ($self,$line) = @_;
32 |   unshift(@{$self->{_next}}, $line);
33 | }
34 | 
35 | 1;


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/cleaning.corr.table.csv:
--------------------------------------------------------------------------------
 1 | # Number of kmers in the perfect, raw and cleaned graphs
 2 | # _nreal is the number of real kmers in the raw/cleaned graph
 3 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal
 4 | k21,912362,1115087,912361,912624,912361
 5 | k31,962865,1438774,962864,962835,962835
 6 | k41,984331,1635697,984330,984322,984322
 7 | k51,992751,1697232,992749,992735,992735
 8 | k61,996024,1674397,996022,996008,996008
 9 | k71,997500,1587464,997498,997498,997498
10 | k81,998290,1441765,998288,998287,998287
11 | k91,998785,1242238,998704,995357,995352
12 | k99,999043,907324,854979,576,576
13 | 


--------------------------------------------------------------------------------
/results/var_calling_10ecoli/results/20150510_sun_initrun/20150510.sun.txt.4:
--------------------------------------------------------------------------------
1 | make -f task.k21.mk CTXDIR=../.. MEM=2G breakpointvcf
2 | make[1]: Entering directory `/data1/users/turner/cortex_sims/ninja-cortex/results/bubble_calling_10ecoli'
3 | make[1]: Nothing to be done for `breakpointvcf'.
4 | make[1]: Leaving directory `/data1/users/turner/cortex_sims/ninja-cortex/results/bubble_calling_10ecoli'
5 | ../../libs/bcftools/bcftools isec truth.k21.norm.vcf.gz proj/vcfs/breakpoints.k21.vcf.gz -p truthisec
6 | McCortex-brkpt Missed: 1557 / 45789 ( 3.40%)
7 | McCortex-brkpt FP:      238 / 44470 ( 0.54%)
8 | McCortex-brkpt Found:  44232 / 45789 (96.60%)
9 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160929thurs/cleaning.table.csv:
--------------------------------------------------------------------------------
 1 | # Number of kmers in the perfect, raw and cleaned graphs
 2 | # _nreal is the number of real kmers in the raw/cleaned graph
 3 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal
 4 | k21,912362,8064108,912361,916474,912340
 5 | k31,962865,10437776,962864,963212,962853
 6 | k41,984331,11684532,984330,984311,984310
 7 | k51,992751,11939641,992749,992711,992711
 8 | k61,996024,11301966,996022,996009,996009
 9 | k71,997500,9841194,997498,997436,997436
10 | k81,998290,7606657,998286,997359,997359
11 | k91,998785,4640344,996949,406297,406277
12 | k99,999043,1484663,703944,2269,2266
13 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/cleaning.table.csv:
--------------------------------------------------------------------------------
 1 | # Number of kmers in the perfect, raw and cleaned graphs
 2 | # _nreal is the number of real kmers in the raw/cleaned graph
 3 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal
 4 | k21,912362,8064108,912361,916474,912340
 5 | k31,962865,10437776,962864,963212,962853
 6 | k41,984331,11684532,984330,984311,984310
 7 | k51,992751,11939641,992749,992711,992711
 8 | k61,996024,11301966,996022,996009,996009
 9 | k71,997500,9841194,997498,997436,997436
10 | k81,998290,7606657,998286,997359,997359
11 | k91,998785,4640344,996949,406297,406277
12 | k99,999043,1484663,703944,2269,2266
13 | 


--------------------------------------------------------------------------------
/libs/misc/jenkins.h:
--------------------------------------------------------------------------------
 1 | #ifndef JENKINS_H_
 2 | #define JENKINS_H_
 3 | 
 4 | // 5 ops per byte
 5 | static inline uint32_t jenkins_mix(uint32_t h, uint8_t x) {
 6 |   h += x; h += (h<<10); h ^= (h>>6); return h;
 7 | }
 8 | 
 9 | static inline uint32_t jenkins_finish(uint32_t h) {
10 |   h += (h<<3); h ^= (h>>11); h += (h<<15); return h;
11 | }
12 | 
13 | // 5*bytes+6 ops [32bit => 26, 64 => 46]
14 | static inline uint32_t jenkins_one_at_a_time_hash(const uint8_t *key, size_t len)
15 | {
16 |   uint32_t hash, i;
17 |   for(hash = i = 0; i < len; ++i) hash = jenkins_mix(hash, key[i]);
18 |   return jenkins_finish(hash);
19 | }
20 | 
21 | #endif /* JENKINS_H_ */
22 | 


--------------------------------------------------------------------------------
/tests/threading/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/bash -euo pipefail
 2 | 
 3 | # Test read threading through the graph
 4 | # Each test case is in a separate sub-directory
 5 | # threading1:
 6 | # threading2: paired-end threading
 7 | # threading3: paired-end threading with short reads
 8 | # threading4:
 9 | 
10 | all:
11 | 	cd threading1 && $(MAKE)
12 | 	cd threading2 && $(MAKE)
13 | 	cd threading3 && $(MAKE)
14 | 	cd threading4 && $(MAKE)
15 | 	@echo "threading: All looks good."
16 | 
17 | clean:
18 | 	cd threading1 && $(MAKE) clean
19 | 	cd threading2 && $(MAKE) clean
20 | 	cd threading3 && $(MAKE) clean
21 | 	cd threading4 && $(MAKE) clean
22 | 
23 | .PHONY: all clean
24 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160912mon/seqn.errors.csv:
--------------------------------------------------------------------------------
 1 | # The number of sequencing errors that would add a new edge between two
 2 | # existing kmers. Note: there are 3*reflen possible mutations
 3 | kmer,reflen,nkmers,nedges,nerror_edges,cov,err_rate,est_bad_edges
 4 | 21,1000000,927610,933930,7951,100,0.005,1325
 5 | 31,1000000,971394,973934,1874,100,0.005,312
 6 | 41,1000000,988492,989443,394,100,0.005,65
 7 | 51,1000000,994492,994828,85,100,0.005,14
 8 | 61,1000000,996793,996939,35,100,0.005,5
 9 | 71,1000000,997897,997975,9,100,0.005,1
10 | 81,1000000,998506,998551,4,100,0.005,0
11 | 91,1000000,998891,998921,6,100,0.005,1
12 | 99,1000000,999092,999114,0,100,0.005,0
13 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20160929thurs/bad.edges.csv:
--------------------------------------------------------------------------------
 1 | # The number of sequencing errors that would add a new edge between two
 2 | # existing kmers. Note: there are 3*reflen possible mutations
 3 | kmer,reflen,nkmers,nedges,nerror_edges,cov,err_rate,est_bad_edges
 4 | 21,1000000,927610,933930,7951,100,0.005,1325
 5 | 31,1000000,971394,973934,1874,100,0.005,312
 6 | 41,1000000,988492,989443,394,100,0.005,65
 7 | 51,1000000,994492,994828,85,100,0.005,14
 8 | 61,1000000,996793,996939,35,100,0.005,5
 9 | 71,1000000,997897,997975,9,100,0.005,1
10 | 81,1000000,998506,998551,4,100,0.005,0
11 | 91,1000000,998891,998921,6,100,0.005,1
12 | 99,1000000,999092,999114,0,100,0.005,0
13 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20161012wed/bad.edges.csv:
--------------------------------------------------------------------------------
 1 | # The number of sequencing errors that would add a new edge between two
 2 | # existing kmers. Note: there are 3*reflen possible mutations
 3 | kmer,reflen,nkmers,nedges,nerror_edges,cov,err_rate,est_bad_edges
 4 | 21,1000000,927610,933930,7951,100,0.005,1325
 5 | 31,1000000,971394,973934,1874,100,0.005,312
 6 | 41,1000000,988492,989443,394,100,0.005,65
 7 | 51,1000000,994492,994828,85,100,0.005,14
 8 | 61,1000000,996793,996939,35,100,0.005,5
 9 | 71,1000000,997897,997975,9,100,0.005,1
10 | 81,1000000,998506,998551,4,100,0.005,0
11 | 91,1000000,998891,998921,6,100,0.005,1
12 | 99,1000000,999092,999114,0,100,0.005,0
13 | 


--------------------------------------------------------------------------------
/results/klebsiella/kleb_pneumoniae/freebayes/analysis.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eou pipefail
 4 | set -o xtrace
 5 | 
 6 | CTXDIR=../../../../
 7 | 
 8 | REF=../ref/GCF_000016305.1_ASM1630v1_genomic.fna.gz
 9 | MUMMER=../mummer/mummer.vcf.gz
10 | TRUTH=../truth/CAV1016.fa
11 | MAPPING_TEST=$CTXDIR/scripts/analysis/mapping-vars-test.sh
12 | MUMMER_ISEC=$CTXDIR/scripts/analysis/mummer-vcf-isec.sh
13 | 
14 | vcf=freebayes.vcf.gz
15 | name=freebayes
16 | 
17 | mkdir -p mapping_truth mummer_isec
18 | 
19 | echo "Mapping test..."
20 | $MAPPING_TEST $vcf $REF $TRUTH mapping_truth/$name
21 | echo "Mummer intersection..."
22 | $MUMMER_ISEC $MUMMER $vcf mummer_isec/$name >& $name.isec.log
23 | 


--------------------------------------------------------------------------------
/results/klebsiella/kleb_pneumoniae/platypus/analysis.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eou pipefail
 4 | set -o xtrace
 5 | 
 6 | CTXDIR=../../../../
 7 | 
 8 | REF=../ref/GCF_000016305.1_ASM1630v1_genomic.fna.gz
 9 | MUMMER=../mummer/mummer.vcf.gz
10 | TRUTH=../truth/CAV1016.fa
11 | MAPPING_TEST=$CTXDIR/scripts/analysis/mapping-vars-test.sh
12 | MUMMER_ISEC=$CTXDIR/scripts/analysis/mummer-vcf-isec.sh
13 | 
14 | vcf=platypus.vcf.gz
15 | name=platypus
16 | 
17 | mkdir -p mapping_truth mummer_isec
18 | 
19 | echo "Mapping test..."
20 | $MAPPING_TEST $vcf $REF $TRUTH mapping_truth/$name
21 | echo "Mummer intersection..."
22 | $MUMMER_ISEC $MUMMER $vcf mummer_isec/$name >& $name.isec.log
23 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/bad.edges.csv:
--------------------------------------------------------------------------------
 1 | # The number of sequencing errors that would add a new edge between two
 2 | # existing kmers. Note: there are 3*reflen possible mutations
 3 | kmer,reflen,nkmers,nedges,nerror_edges,cov,err_rate,est_bad_edges
 4 | 21,1000000,955450,959133,3627,100,0.005,604
 5 | 31,1000000,980343,981744,838,100,0.005,139
 6 | 41,1000000,990014,990588,183,100,0.005,30
 7 | 51,1000000,993938,994179,44,100,0.005,7
 8 | 61,1000000,995750,995879,15,100,0.005,2
 9 | 71,1000000,996793,996870,7,100,0.005,1
10 | 81,1000000,997395,997442,2,100,0.005,0
11 | 91,1000000,997789,997822,5,100,0.005,0
12 | 99,1000000,998029,998055,4,100,0.005,0
13 | 


--------------------------------------------------------------------------------
/results/klebsiella/kleb_pneumoniae/cortex/analysis.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eou pipefail
 4 | set -o xtrace
 5 | 
 6 | CTXDIR=../../../../
 7 | 
 8 | REF=../ref/GCF_000016305.1_ASM1630v1_genomic.fna.gz
 9 | MUMMER=../mummer/mummer.vcf.gz
10 | TRUTH=../truth/CAV1016.fa
11 | MAPPING_TEST=$CTXDIR/scripts/analysis/mapping-vars-test.sh
12 | MUMMER_ISEC=$CTXDIR/scripts/analysis/mummer-vcf-isec.sh
13 | 
14 | vcf=cortex.k31.k61.vcf.gz
15 | name=cortex.k31.k61
16 | 
17 | mkdir -p mapping_truth mummer_isec
18 | 
19 | echo "Mapping test..."
20 | $MAPPING_TEST $vcf $REF $TRUTH mapping_truth/$name
21 | echo "Mummer intersection..."
22 | $MUMMER_ISEC $MUMMER $vcf mummer_isec/$name >& $name.isec.log
23 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/bad.edges.csv:
--------------------------------------------------------------------------------
 1 | # The number of sequencing errors that would add a new edge between two
 2 | # existing kmers. Note: there are 3*reflen possible mutations
 3 | kmer,reflen,nkmers,nedges,nerror_edges,cov,err_rate,est_bad_edges
 4 | 21,1000000,955450,959133,3627,100,0.005,604
 5 | 31,1000000,980343,981744,838,100,0.005,139
 6 | 41,1000000,990014,990588,183,100,0.005,30
 7 | 51,1000000,993938,994179,44,100,0.005,7
 8 | 61,1000000,995750,995879,15,100,0.005,2
 9 | 71,1000000,996793,996870,7,100,0.005,1
10 | 81,1000000,997395,997442,2,100,0.005,0
11 | 91,1000000,997789,997822,5,100,0.005,0
12 | 99,1000000,998029,998055,4,100,0.005,0
13 | 


--------------------------------------------------------------------------------
/src/tools/infer_edges.h:
--------------------------------------------------------------------------------
 1 | #ifndef INFER_EDGES_H_
 2 | #define INFER_EDGES_H_
 3 | 
 4 | #include "cortex_types.h"
 5 | #include "db_graph.h"
 6 | 
 7 | // `pop_edges` if true, only add edges that are in at least one other colour
 8 | //  -> If two kmers are in a sample and the population has an edges between
 9 | //     them, add edge to sample.
10 | // Return 1 if changed; 0 otherwise
11 | bool infer_kmer_edges(const BinaryKmer node_bkey, bool pop_edges,
12 |                       Edges *edges, const Covg *covgs,
13 |                       const dBGraph *db_graph);
14 | 
15 | size_t infer_edges(size_t nthreads, bool add_all_edges, const dBGraph *db_graph);
16 | 
17 | #endif /* INFER_EDGES_H_ */
18 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/bad.edges.csv:
--------------------------------------------------------------------------------
 1 | # The number of sequencing errors that would add a new edge between two
 2 | # existing kmers. Note: there are 3*reflen possible mutations
 3 | kmer,reflen,nkmers,nedges,nerror_edges,cov,err_rate,est_bad_edges
 4 | 21,1000000,955450,959133,3627,100,0.005,604
 5 | 31,1000000,980343,981744,838,100,0.005,139
 6 | 41,1000000,990014,990588,183,100,0.005,30
 7 | 51,1000000,993938,994179,44,100,0.005,7
 8 | 61,1000000,995750,995879,15,100,0.005,2
 9 | 71,1000000,996793,996870,7,100,0.005,1
10 | 81,1000000,997395,997442,2,100,0.005,0
11 | 91,1000000,997789,997822,5,100,0.005,0
12 | 99,1000000,998029,998055,4,100,0.005,0
13 | 


--------------------------------------------------------------------------------
/libs/cJSON/tests/test1:
--------------------------------------------------------------------------------
 1 | {
 2 |     "glossary": {
 3 |         "title": "example glossary",
 4 | 		"GlossDiv": {
 5 |             "title": "S",
 6 | 			"GlossList": {
 7 |                 "GlossEntry": {
 8 |                     "ID": "SGML",
 9 | 					"SortAs": "SGML",
10 | 					"GlossTerm": "Standard Generalized Markup Language",
11 | 					"Acronym": "SGML",
12 | 					"Abbrev": "ISO 8879:1986",
13 | 					"GlossDef": {
14 |                         "para": "A meta-markup language, used to create markup languages such as DocBook.",
15 | 						"GlossSeeAlso": ["GML", "XML"]
16 |                     },
17 | 					"GlossSee": "markup"
18 |                 }
19 |             }
20 |         }
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls5/calls.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.2
 2 | ##FILTER=<ID=PASS,Description="All filters passed">
 3 | ##fileDate=20151014
 4 | ##reference=ref/ref.fa
 5 | ##contig=<ID=ref,length=200>
 6 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 7 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT
 8 | ref	7	.	G	T	.	PASS	.	.
 9 | ref	15	.	GCAATACCCCCCGATGACGG	G	.	PASS	.	.
10 | ref	20	.	A	T	.	PASS	.	.
11 | ref	22	.	C	G	.	PASS	.	.
12 | ref	40	.	T	A	.	PASS	.	.
13 | ref	49	.	GG	CGAT	.	PASS	.	.
14 | ref	71	.	A	GAT	.	PASS	.	.
15 | ref	92	.	GCACCAGGG	AC	.	PASS	.	.
16 | ref	121	.	C	G	.	PASS	.	.
17 | ref	142	.	GGCACAGCA	TTACTCTTC	.	PASS	.	.
18 | ref	171	.	A	CTAG	.	PASS	.	.
19 | ref	192	.	CATCATAG	A	.	PASS	.	.
20 | 


--------------------------------------------------------------------------------
/scripts/calculations/c-within-rand-a-b-bounds.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | #
 7 | # Sample three random numbers, return how often
 8 | #  c >= min(a,b) and c <= max(a,b)
 9 | #
10 | 
11 | use List::Util qw(min max);
12 | 
13 | # Human genome
14 | my $genome_size = 3100000000;
15 | my $num_loops = 100000; # 100M
16 | 
17 | my $num_within = 0;
18 | 
19 | for(my $i = 0; $i < $num_loops; $i++) {
20 |   my ($a,$b,$c) = map {int(rand($genome_size))} 0..2;
21 |   if($c >= min($a,$b) && $c <= max($a,$b)) {
22 |     $num_within++;
23 |   }
24 | }
25 | 
26 | 
27 | my $percent = 100 * $num_within / $num_loops;
28 | print "$num_within / $num_loops (".sprintf("%.2f", $percent)."%)\n";
29 | 


--------------------------------------------------------------------------------
/src/tools/correct_reads.h:
--------------------------------------------------------------------------------
 1 | #ifndef CORRECT_READS_H_
 2 | #define CORRECT_READS_H_
 3 | 
 4 | #include "correct_aln_input.h"
 5 | #include "db_graph.h"
 6 | 
 7 | /**
 8 |  * Correct reads against the graph, and print out
 9 |  * @param fq_zero use to fill quality scores; defaults to '.' if zero
10 |  * @param append_orig_seq If true print out '>name orig=ORIGSEQ'
11 |  */
12 | void correct_reads(CorrectAlnInput *inputs, size_t num_inputs,
13 |                    const char *dump_seqgap_hist_path,
14 |                    const char *dump_fraglen_hist_path,
15 |                    char fq_zero, bool append_orig_seq,
16 |                    size_t num_threads, const dBGraph *db_graph);
17 | 
18 | #endif /* CORRECT_READS_H_ */
19 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls1/jane.fa:
--------------------------------------------------------------------------------
1 | >jane 1A>G 50G>A 199G>C
2 | gCTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGaCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAcG
3 | >jane 1A>C 50G>C 199G>A
4 | cCTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGcCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAaG
5 | >jane chr1
6 | TGGGCCAGTACGGTGAATCCCTGATGATCCGCATAGTTTGTAAGTCAAAATGGCGACCGGTCGGTGGGTGTGTAGAGCAACCGGAAAGCTTGCCTTATAG
7 | >jane chr1 30C>T
8 | TGGGCCAGTACGGTGAATCCCTGATGATCtGCATAGTTTGTAAGTCAAAATGGCGACCGGTCGGTGGGTGTGTAGAGCAACCGGAAAGCTTGCCTTATAG
9 | 


--------------------------------------------------------------------------------
/libs/cJSON/tests/test3:
--------------------------------------------------------------------------------
 1 | {"widget": {
 2 |     "debug": "on",
 3 |     "window": {
 4 |         "title": "Sample Konfabulator Widget",
 5 |         "name": "main_window",
 6 |         "width": 500,
 7 |         "height": 500
 8 |     },
 9 |     "image": { 
10 |         "src": "Images/Sun.png",
11 |         "name": "sun1",
12 |         "hOffset": 250,
13 |         "vOffset": 250,
14 |         "alignment": "center"
15 |     },
16 |     "text": {
17 |         "data": "Click Here",
18 |         "size": 36,
19 |         "style": "bold",
20 |         "name": "text1",
21 |         "hOffset": 250,
22 |         "vOffset": 100,
23 |         "alignment": "center",
24 |         "onMouseUp": "sun1.opacity = (sun1.opacity / 100) * 90;"
25 |     }
26 | }}    


--------------------------------------------------------------------------------
/scripts/report/make-kmer-plot.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -eou pipefail
 3 | 
 4 | if [[ $# -ne 3 ]]; then
 5 |   echo "usage: $0 <plot-covg-hist.R> <in.csv> <out.pdf>" 1>&2
 6 |   exit -1
 7 | fi
 8 | 
 9 | #in:  data/sample.kK.se.links.csv
10 | #out: plots/sample.kK.se.links.pdf
11 | script=$1
12 | in=$2
13 | out=$3
14 | 
15 | ROOT=`echo $in | awk '{gsub(/\.(raw|clean).cov.csv$/,"")}1'`
16 | 
17 | CUTOFF=`([[ -e $ROOT.kthresh ]] && cat $ROOT.kthresh) || echo 0`
18 | KCOV=`([[ -e $ROOT.kmercov ]] && cat $ROOT.kmercov) || echo 0`
19 | 
20 | echo in=$in
21 | echo out=$out
22 | echo cutoff_file=$ROOT.kthresh
23 | echo kcov_file=$ROOT.kmercov
24 | echo CUTOFF=$CUTOFF
25 | echo KCOV=$KCOV
26 | 
27 | set -o xtrace
28 | $script $in $out $CUTOFF $KCOV
29 | 


--------------------------------------------------------------------------------
/src/basic/str_parsing.c:
--------------------------------------------------------------------------------
 1 | #include "global.h"
 2 | #include "str_parsing.h"
 3 | 
 4 | // Parse a comma separated list e.g. "12,3,12"
 5 | // Returns <0 on error, otherwise number of chars used
 6 | int comma_list_to_array(const char *str, SizeBuffer *nums)
 7 | {
 8 |   size_t num = 0;
 9 |   const char *ptr = str;
10 |   char *end = NULL;
11 | 
12 |   // If no numbers success
13 |   if(*ptr < '0' && *ptr > '9') return 0;
14 | 
15 |   while(1) {
16 |     num = strtoul(ptr, &end, 10);
17 |     size_buf_add(nums, num);
18 |     if(!end) die("Cannot parse: '%s'", str);
19 |     if(*end != ',') break;
20 |     ptr = end+1;
21 |     if(*ptr < '0' || *ptr > '9') return -1; // no number after comma!
22 |   }
23 | 
24 |   return end-str;
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/scripts/perl/mccortex-header.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | # Use current directory to find modules
 7 | use FindBin;
 8 | use lib $FindBin::Bin;
 9 | 
10 | use McCortexScripts; # load_json_hdr()
11 | 
12 | sub print_usage
13 | {
14 |   for my $err (@_) { print STDERR "Error: $err\n"; }
15 |   print STDERR "" .
16 | "Usage $0 <file>
17 | 
18 |   Read only the JSON header from a file.
19 | 
20 | ";
21 | 
22 |   exit(-1);
23 | }
24 | 
25 | if(@ARGV != 1) { print_usage(); }
26 | my $path = shift(@ARGV);
27 | 
28 | use IO::Zlib;
29 | my $gz = new IO::Zlib;
30 | $gz->open($path, "rb") or die("Cannot open file: $path");
31 | 
32 | my $hdr = load_json_hdr($gz, $path);
33 | 
34 | print $hdr;
35 | 
36 | $gz->close();
37 | 


--------------------------------------------------------------------------------
/results/hash_table_benchmark/results20150409thurs.linux.txt:
--------------------------------------------------------------------------------
 1 | Reading: results20150409thurs.linux.csv 
 2 | Rows: 70 
 3 | 1 [1:5] mean: 42.402 stddev: 0.1351203
 4 | 2 [6:10] mean: 491.7282 stddev: 0.6633338
 5 | 3 [11:15] mean: 74.853 stddev: 4.378826
 6 | 4 [16:20] mean: 245.0712 stddev: 83.90267
 7 | 5 [21:25] mean: 73.7224 stddev: 16.67115
 8 | 6 [26:30] mean: 327.1884 stddev: 126.755
 9 | 7 [31:35] mean: 14.6294 stddev: 6.844972
10 | 8 [36:40] mean: 136.237 stddev: 45.24435
11 | 9 [41:45] mean: 19.4692 stddev: 2.524081
12 | 10 [46:50] mean: 216.0472 stddev: 16.80419
13 | 11 [51:55] mean: 41.7132 stddev: 3.203693
14 | 12 [56:60] mean: 325.4446 stddev: 78.24026
15 | 13 [61:65] mean: 50.8198 stddev: 11.77143
16 | 14 [66:70] mean: 404.2498 stddev: 113.7489
17 | 


--------------------------------------------------------------------------------
/tests/subgraph_unitigs/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL:=/bin/bash -euo pipefail
 2 | 
 3 | K=11
 4 | CTXDIR=../..
 5 | MCCORTEX=$(shell echo $(CTXDIR)/bin/mccortex$$[(($(K)+31)/32)*32 - 1])
 6 | 
 7 | all: subgraph0.k$(K).ctx subgraph1.k$(K).ctx
 8 | 
 9 | seed.fa:
10 | 	echo TGCCTAGAAGG > seed.fa
11 | 
12 | seq.fa:
13 | 	echo ATGGTGCCTAGAAGGTA > seq.fa
14 | 	echo cTGGTGCCTAGAAGGTg >> seq.fa
15 | 
16 | graph.k$(K).ctx: seq.fa
17 | 	$(MCCORTEX) build -q -m 1M -k $(K) --sample MsGraph --seq seq.fa graph.k$(K).ctx
18 | 
19 | subgraph%.k$(K).ctx: graph.k$(K).ctx seed.fa
20 | 	$(MCCORTEX) subgraph -q --seed seed.fa --unitigs --dist $* -o subgraph$*.k$(K).ctx graph.k$(K).ctx
21 | 
22 | clean:
23 | 	rm -rf subgraph*.k$(K).ctx graph.k$(K).ctx seed.fa seq.fa
24 | 
25 | .PHONY: all clean
26 | 


--------------------------------------------------------------------------------
/results/data/chr22/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | SHELL=/bin/bash -eou pipefail
 3 | 
 4 | CTXDIR=../../../
 5 | SAMTOOLS=$(CTXDIR)/libs/samtools/samtools
 6 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat
 7 | 
 8 | all: chr22.fa chr22_17M_18M.fa chr22_28M_29M.fa
 9 | 
10 | chr22.fa:
11 | 	curl -O http://hgdownload.cse.ucsc.edu/goldenPath/hg19/chromosomes/chr22.fa.gz
12 | 	gzip -d chr22.fa.gz
13 | 
14 | chr22_17M_18M.fa: chr22.fa
15 | 	$(SAMTOOLS) faidx chr22.fa chr22:17000000-17999999 | \
16 | 	$(DNACAT) -M <(echo 'chr22_17M_18M') - > $@
17 | 	if $(DNACAT) -P $@ | grep -qi n; then false; fi
18 | 
19 | chr22_28M_29M.fa: chr22.fa
20 | 	$(SAMTOOLS) faidx chr22.fa chr22:28000000-28999999 | \
21 | 	$(DNACAT) -M <(echo 'chr22_28M_29M') - > $@
22 | 	if $(DNACAT) -P $@ | grep -qi n; then false; fi
23 | 


--------------------------------------------------------------------------------
/src/graph/graph_format.h:
--------------------------------------------------------------------------------
 1 | #ifndef GRAPH_FORMAT_H_
 2 | #define GRAPH_FORMAT_H_
 3 | 
 4 | // graph file format version
 5 | #define CTX_GRAPH_FILEFORMAT 6
 6 | 
 7 | #include "graph_info.h"
 8 | 
 9 | // Graph (.ctx)
10 | typedef struct
11 | {
12 |   uint32_t version, kmer_size, num_of_bitfields, num_of_cols;
13 |   GraphInfo *ginfo; // Cleaning info etc for each colour
14 |   size_t capacity;
15 | } GraphFileHeader;
16 | 
17 | void graph_header_capacity(GraphFileHeader *header, size_t num_of_cols);
18 | void graph_header_dealloc(GraphFileHeader *header);
19 | void graph_header_print(const GraphFileHeader *header);
20 | 
21 | static inline void graph_header_free(GraphFileHeader *hdr) {
22 |   graph_header_dealloc(hdr);
23 |   ctx_free(hdr);
24 | }
25 | 
26 | #endif /* GRAPH_FORMAT_H_ */
27 | 


--------------------------------------------------------------------------------
/tests/coverage/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL:=/bin/bash -euo pipefail
 2 | 
 3 | CTXDIR=../..
 4 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat
 5 | MCCORTEX=$(CTXDIR)/bin/mccortex31
 6 | K=5
 7 | 
 8 | TGTS=seq.fa rnd.fa seq.k$(K).ctx coverage.txt
 9 | 
10 | all: $(TGTS)
11 | 
12 | clean:
13 | 	rm -rf $(TGTS)
14 | 
15 | %.fa:
16 | 	$(DNACAT) -F -n 50 > $@
17 | 
18 | seq.k$(K).ctx: seq.fa
19 | 	$(MCCORTEX) build -q -k $(K) --sample Wallace \
20 | 	                             --sample Gromit --seq seq.fa \
21 | 	                             --sample Trousers --seq seq.fa --seq2 seq.fa:seq.fa $@
22 | 	$(MCCORTEX) view -qk $@
23 | 
24 | coverage.txt: seq.k$(K).ctx rnd.fa
25 | 	$(MCCORTEX) coverage -q --seq rnd.fa -1 seq.fa seq.k$(K).ctx > coverage.txt
26 | 	cat coverage.txt
27 | 
28 | .PHONY: all clean
29 | 


--------------------------------------------------------------------------------
/results/contig_confidence/confidence.tex:
--------------------------------------------------------------------------------
 1 | \documentclass{article}
 2 | \title{Assembly confidence}
 3 | 
 4 | \usepackage{mathtools}
 5 | \usepackage{graphicx}
 6 | \usepackage{subcaption}
 7 | 
 8 | \begin{document}
 9 | 
10 | \section{Assembly Confidence}
11 | 
12 | % TODO Description
13 | 
14 | \begin{figure}[ht]
15 | \centering
16 | \includegraphics[width=5in]{confidence_plot.pdf}
17 | \caption{Confidence values for various read lengths and coverages}
18 | \label{fig:contig_conf_values_plot}
19 | \end{figure}
20 | 
21 | Shown in Figure \ref{fig:contig_conf_values_plot}.
22 | 
23 | \begin{gather*}
24 | \lambda = Coverage / ReadLength \\
25 | R_k = ReadLength - JunctionDistance + 1 \\
26 | confidence = \left(1 - e^{-\lambda R_k}\right) e^{-\lambda e^{-\lambda R_k}}
27 | \end{gather*}
28 | 
29 | 
30 | \end{document}
31 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls1/calls.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.1
 2 | ##fileDate=20151014
 3 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 4 | ##reference=ref/ref.fa
 5 | ##contig=<ID=ref,length=200>
 6 | ##contig=<ID=chr1,length=200>
 7 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT
 8 | ref	1	.	A	T	.	PASS	.	GT
 9 | ref	1	.	A	T	.	PASS	.	GT
10 | ref	1	.	A	G	.	PASS	.	GT
11 | ref	1	.	A	G	.	PASS	.	GT
12 | ref	1	.	A	C	.	PASS	.	GT
13 | ref	1	.	A	C,T	.	PASS	.	GT
14 | ref	50	.	G	T	.	PASS	.	GT
15 | ref	50	.	G	T	.	PASS	.	GT
16 | ref	50	.	G	A	.	PASS	.	GT
17 | ref	50	.	G	A	.	PASS	.	GT
18 | ref	50	.	G	C	.	PASS	.	GT
19 | ref	199	.	G	T	.	PASS	.	GT
20 | ref	199	.	G	T	.	PASS	.	GT
21 | ref	199	.	G	C	.	PASS	.	GT
22 | ref	199	.	G	C	.	PASS	.	GT
23 | ref	199	.	G	A	.	PASS	.	GT
24 | chr1	30	.	C	T	.	PASS	.	GT
25 | chr1	30	.	C	T	.	PASS	.	GT
26 | 


--------------------------------------------------------------------------------
/src/graph/graph_search.h:
--------------------------------------------------------------------------------
 1 | #ifndef GRAPH_SEARCH_H_
 2 | #define GRAPH_SEARCH_H_
 3 | 
 4 | #include "cortex_types.h"
 5 | #include "binary_kmer.h"
 6 | #include "graph_file_reader.h"
 7 | 
 8 | //
 9 | // Search a sorted graph file on disk
10 | //
11 | 
12 | typedef struct GraphFileSearch GraphFileSearch;
13 | 
14 | GraphFileSearch *graph_search_new(GraphFileReader *file);
15 | void graph_search_destroy(GraphFileSearch *gs);
16 | 
17 | bool graph_search_find(GraphFileSearch *gs, BinaryKmer bkey,
18 |                        Covg *covgs, Edges *edges);
19 | 
20 | void graph_search_fetch(GraphFileSearch *gs, size_t idx,
21 |                         BinaryKmer *bkey, Covg *covgs, Edges *edges);
22 | 
23 | void graph_search_rand(GraphFileSearch *gs,
24 |                        BinaryKmer *bkey, Covg *covgs, Edges *edges);
25 | 
26 | #endif /* GRAPH_SEARCH_H_ */
27 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/notes.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Requires python and uses the bundled version of bfc. SGA assembly can also be
 3 | generated if you have it installed (e.g. with `brew install science/sga`).
 4 | 
 5 | Specify the absolue path to the reference FASTA:
 6 | 
 7 |     REF=../data/chr22/chr22_28M_29M.fa
 8 |     # OR
 9 |     REF=../data/chr22/chr22_17M_18M.fa
10 |     # Then convert to absolute path
11 |     REF=$(cd "$(dirname "$REF")"; pwd)/$(basename "$REF")
12 | 
13 | Sample reads and assemble with:
14 | 
15 |     make REF=$REF
16 | 
17 | Run SGA assembly
18 | 
19 |     ./sga-all-kmers.sh stocherr_cov/sga data/stocherr_cov.1.fq.gz data/stocherr_cov.2.fq.gz $REF
20 |     ./sga-all-kmers.sh stocherr_corr/sga data/stocherr_corr.1.fq.gz data/stocherr_corr.2.fq.gz $REF
21 | 
22 | Generate plots and tables with:
23 | 
24 |     cd results
25 |     ./generate-results.sh
26 | 


--------------------------------------------------------------------------------
/scripts/R/plot-length-hist.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript --vanilla
 2 | 
 3 | # Plot coverage histograms generated by e.g. 'mccortex31 clean --length-before out.csv ...'
 4 | # input csv should have the columns: 'bp' and 'Count'
 5 | #
 6 | args <- commandArgs(trailingOnly=TRUE)
 7 | if(length(args) != 2) {
 8 |   stop("Usage: Rscript --vanilla plot-hist-hist.R <lengths.csv> <lengths.pdf>\n")
 9 | }
10 | 
11 | input_csv=args[1]
12 | output_pdf=args[2]
13 | 
14 | library('ggplot2')
15 | 
16 | d=read.csv(file=input_csv,sep=',',as.is=T)
17 | 
18 | p <- ggplot(data=d, aes(bp, Count)) +
19 |        geom_bar(stat="identity", color="seagreen") +
20 |        scale_y_log10() +
21 |        xlab("Untig length (bases)") +
22 |        ylab("Number of unitigs") +
23 |        ggtitle("Unitig length distribution") +
24 |        xlim(0,75)
25 | 
26 | ggsave(filename=output_pdf, plot=p, width=6, height=6)
27 | 


--------------------------------------------------------------------------------
/tests/vcfcov/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/bash -euo pipefail
 2 | 
 3 | # Each test case is in a separate sub-directory
 4 | 
 5 | # call0: test with empty VCF
 6 | # call1: overlapping SNPs at ref:1,50,199, chr1:50. Handle missing contig= header.
 7 | # call2: SNP every base, across two chromosomes
 8 | # call3: blocks of overlapping variants (on one chrom)
 9 | # call4: variants exactly k-1 bases apart (on one chrom)
10 | # call5: test for large indels
11 | 
12 | all:
13 | 	cd calls0 && $(MAKE)
14 | 	cd calls1 && $(MAKE)
15 | 	cd calls2 && $(MAKE)
16 | 	cd calls3 && $(MAKE)
17 | 	cd calls4 && $(MAKE)
18 | 	cd calls5 && $(MAKE)
19 | 	@echo "vcfcov: All looks good."
20 | 
21 | clean:
22 | 	cd calls0 && $(MAKE) clean
23 | 	cd calls1 && $(MAKE) clean
24 | 	cd calls2 && $(MAKE) clean
25 | 	cd calls3 && $(MAKE) clean
26 | 	cd calls4 && $(MAKE) clean
27 | 	cd calls5 && $(MAKE) clean
28 | 
29 | .PHONY: all clean view
30 | 


--------------------------------------------------------------------------------
/scripts/bash/links-median-threshold.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eou pipefail
 4 | 
 5 | if [ $# -ne 3 ]; then
 6 |   >&2 echo "usage: $0 <fdr> <k> <tree.csv>" &&
 7 |   >&2 echo "  Pick threshold for cleaning links" && false
 8 | fi
 9 | 
10 | fdr_limit=$1
11 | k=$2
12 | tree_csv=$3
13 | 
14 | maxk=$[ ( ($k + 31) / 32 ) * 32 - 1 ]
15 | DIR=$( cd $( dirname ${BASH_SOURCE[0]} ) && pwd )
16 | CTX="$DIR/../../bin/mccortex $k"
17 | 
18 | thresh=$($CTX linkthresh -q --zero $fdr_limit $[$k+2] $tree_csv;
19 |          $CTX linkthresh -q --zero $fdr_limit $[$k+3] $tree_csv;
20 |          $CTX linkthresh -q --zero $fdr_limit $[$k+4] $tree_csv;
21 |          $CTX linkthresh -q --zero $fdr_limit $[$k+5] $tree_csv;
22 |          $CTX linkthresh -q --zero $fdr_limit $[$k+6] $tree_csv;)
23 | 
24 | # Print all 5 values
25 | echo $thresh;
26 | # Print median
27 | echo $thresh | tr " " "\n" | sort -n | head -3 | tail -1
28 | 


--------------------------------------------------------------------------------
/scripts/build/mccortex:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Exit immediately if a command exits with a non-zero status.
 4 | set -euo pipefail
 5 | 
 6 | function usage {
 7 |   echo "usage: mccortex <K> [<cmd> ...]" >&2
 8 |   echo "  Wrapper to find the correct mccortex binary given kmer size (K)" >&2
 9 |   exit -1
10 | }
11 | 
12 | if [[ $# -lt 1 || !( $1 =~ ^[0-9]+$ ) ]]
13 | then
14 |   usage
15 | fi
16 | 
17 | K=$1
18 | shift
19 | 
20 | if [[ $[ $K & 1 ] -eq 0 || $K -lt 3 ]]
21 | then
22 |   echo "kmer is not odd and greater than 2: $K" >&2
23 |   exit -1
24 | fi
25 | 
26 | MAXK=$[ (($K+31)/32)*32 - 1 ]
27 | PARENTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && cd .. && pwd )"
28 | CMD=$PARENTDIR/bin/mccortex$MAXK
29 | 
30 | if ! [[ -e $CMD ]]
31 | then
32 |   echo "Error: $CMD not found" >&2
33 |   echo "Please compile mccortex with: 'make MAXK=$MAXK'" >&2
34 |   exit -2
35 | fi
36 | 
37 | # Run
38 | $CMD "$@"
39 | 


--------------------------------------------------------------------------------
/tests/largeK/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL:=/bin/bash -euo pipefail
 2 | 
 3 | # Run build and clean commands followed by the check command
 4 | # to test we can build valid graphs for random sequence
 5 | # Works for any K value (e.g. K=11, K=39, K=61)
 6 | 
 7 | K=39
 8 | CTXDIR=../..
 9 | MCCORTEX=$(shell echo $(CTXDIR)/bin/mccortex$$[(($(K)+31)/32)*32 - 1])
10 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat
11 | 
12 | GRAPHS=rnd.k$(K).ctx rnd.clean.k$(K).ctx
13 | LOGS=$(addsuffix .log,$(GRAPHS))
14 | TGTS=rnd.fa $(GRAPHS)
15 | 
16 | all: $(TGTS)
17 | 
18 | clean:
19 | 	rm -rf $(TGTS) $(LOGS)
20 | 
21 | rnd.fa:
22 | 	$(DNACAT) -F -n 200 > $@
23 | 
24 | rnd.k$(K).ctx: rnd.fa
25 | 	$(MCCORTEX) build -k $(K) --sample rnd --seq rnd.fa $@ >& $@.log
26 | 	$(MCCORTEX) check -q $@
27 | 
28 | rnd.clean.k$(K).ctx: rnd.k$(K).ctx
29 | 	$(MCCORTEX) clean -o $@ --unitigs=2 $< >& $@.log
30 | 	$(MCCORTEX) check -q $@
31 | 
32 | .PHONY: all clean
33 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls1/john.fa:
--------------------------------------------------------------------------------
 1 | >john
 2 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG
 3 | >john
 4 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG
 5 | >john 1A>T 50G>T 199G>T
 6 | tCTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGtCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAtG
 7 | >john chr1 30C>T
 8 | TGGGCCAGTACGGTGAATCCCTGATGATCtGCATAGTTTGTAAGTCAAAATGGCGACCGGTCGGTGGGTGTGTAGAGCAACCGGAAAGCTTGCCTTATAG
 9 | >john chr1 30C>T
10 | TGGGCCAGTACGGTGAATCCCTGATGATCtGCATAGTTTGTAAGTCAAAATGGCGACCGGTCGGTGGGTGTGTAGAGCAACCGGAAAGCTTGCCTTATAG
11 | 


--------------------------------------------------------------------------------
/results/klebsiella/kleb_pneumoniae/mcrun/analysis.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eou pipefail
 4 | set -o xtrace
 5 | 
 6 | CTXDIR=../../../../
 7 | 
 8 | function myreadlink() {
 9 |   ( cd $(dirname $1); echo $PWD/$(basename $1); )
10 | }
11 | 
12 | REF=$(myreadlink ../ref/GCF_000016305.1_ASM1630v1_genomic.fna.gz)
13 | MUMMER=$(myreadlink ../mummer/mummer.vcf.gz)
14 | TRUTH=$(myreadlink ../truth/CAV1016.fa)
15 | BCFTOOLS=$(myreadlink $CTXDIR/libs/bcftools/bcftools)
16 | MAPPING_TEST=$(myreadlink $CTXDIR/scripts/analysis/mapping-vars-test.sh)
17 | MUMMER_ISEC=$(myreadlink $CTXDIR/scripts/analysis/mummer-vcf-isec.sh)
18 | 
19 | mkdir -p mapping_truth mummer_isec
20 | 
21 | for vcf in `ls vcfs/*k{61,51}.vcf.gz`; do
22 |   name=`basename $vcf .vcf.gz`
23 |   echo "== $name"
24 |   [ -e $vcf.csi ] || $BCFTOOLS index $vcf
25 |   $MAPPING_TEST $vcf $REF $TRUTH mapping_truth/$name
26 |   $MUMMER_ISEC $MUMMER $vcf mummer_isec/$name >& $name.isec.log
27 | done
28 | 
29 | 


--------------------------------------------------------------------------------
/src/global/cortex_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef CORTEX_TYPES_H_
 2 | #define CORTEX_TYPES_H_
 3 | 
 4 | typedef size_t Colour;
 5 | typedef uint8_t Edges;
 6 | typedef uint32_t Covg;
 7 | 
 8 | #define COVG_MAX UINT_MAX
 9 | 
10 | #define SAFE_ADD_COVG(a,b) ((uint64_t)(a)+(b) > COVG_MAX ? COVG_MAX : (a)+(b))
11 | #define SAFE_SUM_COVG(a,b) ((a) = SAFE_ADD_COVG((a), (b)))
12 | 
13 | typedef uint8_t Orientation;
14 | #define FORWARD 0
15 | #define REVERSE 1
16 | 
17 | typedef uint8_t ReadMateDir;
18 | #define READPAIR_FF 0
19 | #define READPAIR_FR 1
20 | #define READPAIR_RF 2
21 | #define READPAIR_RR 3
22 | // See seq_reader.h to get string representations (MP_DIR_STRS[dir])
23 | 
24 | #define read_mate_r1(r) ((r)&2)
25 | #define read_mate_r2(r) ((r)&1)
26 | 
27 | // don't ever use the top bit of hkey, used later for orientation
28 | typedef uint64_t hkey_t;
29 | 
30 | typedef struct {
31 |   hkey_t orient:1, key:63;
32 | } dBNode;
33 | 
34 | #endif /* CORTEX_TYPES_H_ */
35 | 


--------------------------------------------------------------------------------
/results/file_buffering/file-buffering.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | function usage {
 6 |   echo "usage: $0 <chr22.fa>" >&2
 7 |   echo "  Compare buffered vs unbuffered read times" >&2
 8 |   exit -1
 9 | }
10 | 
11 | if [ $# -ne 1 ]; then usage; fi
12 | 
13 | SEQTEST=../../libs/seq_file/benchmarks/seqtest
14 | 
15 | # Load into disk cache
16 | $SEQTEST --no-zlib --no-buf $1 >& /dev/null
17 | 
18 | (
19 | time $SEQTEST --no-zlib --no-buf $1;
20 | time $SEQTEST --no-zlib --no-buf $1;
21 | time $SEQTEST --no-zlib --no-buf $1;
22 | time $SEQTEST --no-zlib --no-buf $1;
23 | time $SEQTEST --no-zlib --no-buf $1;
24 | 
25 | time $SEQTEST --no-zlib          $1;
26 | time $SEQTEST --no-zlib          $1;
27 | time $SEQTEST --no-zlib          $1;
28 | time $SEQTEST --no-zlib          $1;
29 | time $SEQTEST --no-zlib          $1;
30 | ) 2>&1 | \
31 | grep '^user' | \
32 | sed -E 's/.*([0-9]+)m([0-9\.]+)s.*/\1 \2/g' | \
33 | awk '{print $1*60+$2}'
34 | 


--------------------------------------------------------------------------------
/scripts/bash/mccortex-to-ray.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Exit immediately if a command exits with a non-zero status.
 4 | set -euo pipefail
 5 | 
 6 | cmd=$0
 7 | if [[ $# -ne 1 && $# -ne 2 ]];
 8 | then
 9 |   echo "usage $cmd <input.ctx[:colour]>"
10 |   exit -1;
11 | fi
12 | 
13 | DIR=$( cd $( dirname ${BASH_SOURCE[0]} ) && cd .. && pwd )
14 | CTX="$DIR/bin/mccortex31 view"
15 | 
16 | col=0
17 | if [[ $# -eq 2 ]]
18 | then
19 |   col=$2
20 | 
21 |   # Check number of colours in binary
22 |   bincols=`$CTX --info $1 | grep 'colours:' | grep -o '[0-9]*$'` || exit
23 |   if [[ $col -ge $bincols ]]
24 |   then
25 |     echo "Binary only has $bincols colours (you requested $col)"
26 |     exit -1
27 |   fi
28 | fi
29 | 
30 | $CTX --print_kmers $1 | awk 'BEGIN { col='$col' } {
31 |   covg=$(2+col);
32 |   edges=$(2+col+(NF-1)/2);
33 |   x=substr(edges,0,4); y=substr(edges,5,8);
34 |   gsub("\\.","",x); gsub("\\.","",y);
35 |   print $1";"covg";"toupper(x)";"y
36 | }'
37 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls0/truth.cov.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.2
 2 | ##FILTER=<ID=PASS,Description="All filters passed">
 3 | ##fileDate=20151014
 4 | ##reference=ref/ref.fa
 5 | ##contig=<ID=ref,length=200>
 6 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 7 | ##INFO=<ID=HK21R,Number=A,Type=Integer,Description="Num. ref kmers unique in haplotypes (k=21)">
 8 | ##INFO=<ID=HK21A,Number=A,Type=Integer,Description="Num. alt kmers unique in haplotypes (k=21)">
 9 | ##FORMAT=<ID=NK21R,Number=A,Type=Integer,Description="Number of exclusive kmers on ref found for each allele (k=21)">
10 | ##FORMAT=<ID=CK21R,Number=A,Type=Integer,Description="Mean ref coverage for found kmers (k=21)">
11 | ##FORMAT=<ID=NK21A,Number=A,Type=Integer,Description="Number of exclusive kmers on alt found for each allele (k=21)">
12 | ##FORMAT=<ID=CK21A,Number=A,Type=Integer,Description="Mean alt coverage for found kmers (k=21)">
13 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	MasterGrunt	John	Jane
14 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/cleaning.corr.table.csv:
--------------------------------------------------------------------------------
 1 | # Number of kmers in the perfect, raw and cleaned graphs
 2 | # _nreal is the number of real kmers in the raw/cleaned graph
 3 | # raw_errs, clean_errs are the fraction of error kmers in each graph
 4 | # frac_remove_errs is the fraction of kmers removed that were seqn errs
 5 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal,raw_errs,clean_errs,frac_remove_errs
 6 | 21,943524,1041105,943524,943614,943524,0.09373,0.00010,1.00000
 7 | 31,973199,1184735,973199,973203,973199,0.17855,0.00000,1.00000
 8 | 41,985939,1265236,985939,985939,985939,0.22075,0.00000,1.00000
 9 | 51,991526,1292210,991526,991526,991526,0.23269,0.00000,1.00000
10 | 61,994205,1284937,994205,994205,994205,0.22626,0.00000,1.00000
11 | 71,995708,1249759,995708,995708,995708,0.20328,0.00000,1.00000
12 | 81,996639,1188147,996639,996638,996638,0.16118,0.00000,0.99999
13 | 91,997273,1102712,997215,994901,994896,0.09567,0.00001,0.97849
14 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170206mon_chr22_28M/cleaning.table.csv:
--------------------------------------------------------------------------------
 1 | # Number of kmers in the perfect, raw and cleaned graphs
 2 | # _nreal is the number of real kmers in the raw/cleaned graph
 3 | # raw_errs, clean_errs are the fraction of error kmers in each graph
 4 | # frac_remove_errs is the fraction of kmers removed that were seqn errs
 5 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal,raw_errs,clean_errs,frac_remove_errs
 6 | 21,943524,8281513,943524,944420,943506,0.88607,0.00097,1.00000
 7 | 31,973199,10505480,973199,973255,973151,0.90736,0.00011,0.99999
 8 | 41,985939,11685959,985939,985901,985901,0.91563,0.00000,1.00000
 9 | 51,991526,11925083,991526,991494,991494,0.91685,0.00000,1.00000
10 | 61,994205,11290786,994205,994174,994174,0.91195,0.00000,1.00000
11 | 71,995708,9832665,995706,995658,995658,0.89873,0.00000,0.99999
12 | 81,996639,7598955,996636,995784,995784,0.86885,0.00000,0.99987
13 | 91,997273,4637375,995584,409278,409248,0.78531,0.00007,0.86132
14 | 


--------------------------------------------------------------------------------
/scripts/bash/vcf-longest-haplotype.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | cmd=$0
 6 | if [[ $# -ne 1 ]]; then
 7 |   echo "usage: $cmd <in.vcf>"
 8 |   exit -1
 9 | fi
10 | 
11 | if ! [[ -e $1 ]]; then
12 |   echo "Cannot read $1"
13 |   exit -1
14 | fi
15 | 
16 | STATS=`grep -v '^#' $1 | awk '{print $5}' | tr ',' '
17 | ' | awk '{print length}' | sort -n | awk 'BEGIN{max=0; sum=0;}
18 | { values[NR]=$1; sum += $1; if ( $1 > max ) { max = $1; } }
19 | END{
20 |   median = (NR % 2) ? values[(NR + 1) / 2] \
21 |                     : (values[(NR / 2)] + values[(NR / 2) + 1]) / 2.0;
22 |   print sum" "NR" "max" "sprintf("%.1f", sum/NR)" "median
23 | }'`
24 | 
25 | sum=`echo $STATS | cut -d' ' -f1`
26 | num=`echo $STATS | cut -d' ' -f2`
27 | max=`echo $STATS | cut -d' ' -f3`
28 | mean=`echo $STATS | cut -d' ' -f4`
29 | median=`echo $STATS | cut -d' ' -f5`
30 | 
31 | echo "[Haplotype length (bp)] longest: $max; mean: $mean; median: $median  \
32 | [$num alleles; $sum bp total]"
33 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/cleaning.corr.table.csv:
--------------------------------------------------------------------------------
 1 | # Number of kmers in the perfect, raw and cleaned graphs
 2 | # _nreal is the number of real kmers in the raw/cleaned graph
 3 | # raw_errs, clean_errs are the fraction of error kmers in each graph
 4 | # frac_remove_errs is the fraction of kmers removed that were seqn errs
 5 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal,raw_errs,clean_errs,frac_remove_errs
 6 | 21,943524,1041105,943524,943614,943524,0.09373,0.00010,1.00000
 7 | 31,973199,1184735,973199,973203,973199,0.17855,0.00000,1.00000
 8 | 41,985939,1265236,985939,985939,985939,0.22075,0.00000,1.00000
 9 | 51,991526,1292210,991526,991526,991526,0.23269,0.00000,1.00000
10 | 61,994205,1284937,994205,994205,994205,0.22626,0.00000,1.00000
11 | 71,995708,1249759,995708,995708,995708,0.20328,0.00000,1.00000
12 | 81,996639,1188147,996639,996638,996638,0.16118,0.00000,0.99999
13 | 91,997273,1102712,997215,994901,994896,0.09567,0.00001,0.97849
14 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_confidstep/cleaning.table.csv:
--------------------------------------------------------------------------------
 1 | # Number of kmers in the perfect, raw and cleaned graphs
 2 | # _nreal is the number of real kmers in the raw/cleaned graph
 3 | # raw_errs, clean_errs are the fraction of error kmers in each graph
 4 | # frac_remove_errs is the fraction of kmers removed that were seqn errs
 5 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal,raw_errs,clean_errs,frac_remove_errs
 6 | 21,943524,8281513,943524,944420,943506,0.88607,0.00097,1.00000
 7 | 31,973199,10505480,973199,973255,973151,0.90736,0.00011,0.99999
 8 | 41,985939,11685959,985939,985901,985901,0.91563,0.00000,1.00000
 9 | 51,991526,11925083,991526,991494,991494,0.91685,0.00000,1.00000
10 | 61,994205,11290786,994205,994174,994174,0.91195,0.00000,1.00000
11 | 71,995708,9832665,995706,995658,995658,0.89873,0.00000,0.99999
12 | 81,996639,7598955,996636,995784,995784,0.86885,0.00000,0.99987
13 | 91,997273,4637375,995584,409278,409248,0.78531,0.00007,0.86132
14 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/cleaning.corr.table.csv:
--------------------------------------------------------------------------------
 1 | # Number of kmers in the perfect, raw and cleaned graphs
 2 | # _nreal is the number of real kmers in the raw/cleaned graph
 3 | # raw_errs, clean_errs are the fraction of error kmers in each graph
 4 | # frac_remove_errs is the fraction of kmers removed that were seqn errs
 5 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal,raw_errs,clean_errs,frac_remove_errs
 6 | 21,943524,1041105,943524,943614,943524,0.09373,0.00010,1.00000
 7 | 31,973199,1184735,973199,973203,973199,0.17855,0.00000,1.00000
 8 | 41,985939,1265236,985939,985939,985939,0.22075,0.00000,1.00000
 9 | 51,991526,1292210,991526,991526,991526,0.23269,0.00000,1.00000
10 | 61,994205,1284937,994205,994205,994205,0.22626,0.00000,1.00000
11 | 71,995708,1249759,995708,995708,995708,0.20328,0.00000,1.00000
12 | 81,996639,1188147,996639,996638,996638,0.16118,0.00000,0.99999
13 | 91,997273,1102712,997215,994901,994896,0.09567,0.00001,0.97849
14 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/20170211sat_chr22_28M_nomissing/cleaning.table.csv:
--------------------------------------------------------------------------------
 1 | # Number of kmers in the perfect, raw and cleaned graphs
 2 | # _nreal is the number of real kmers in the raw/cleaned graph
 3 | # raw_errs, clean_errs are the fraction of error kmers in each graph
 4 | # frac_remove_errs is the fraction of kmers removed that were seqn errs
 5 | kmer,nkmers,raw_nkmers,raw_nreal,clean_nkmers,clean_nreal,raw_errs,clean_errs,frac_remove_errs
 6 | 21,943524,8281513,943524,944420,943506,0.88607,0.00097,1.00000
 7 | 31,973199,10505480,973199,973255,973151,0.90736,0.00011,0.99999
 8 | 41,985939,11685959,985939,985901,985901,0.91563,0.00000,1.00000
 9 | 51,991526,11925083,991526,991494,991494,0.91685,0.00000,1.00000
10 | 61,994205,11290786,994205,994174,994174,0.91195,0.00000,1.00000
11 | 71,995708,9832665,995706,995658,995658,0.89873,0.00000,0.99999
12 | 81,996639,7598955,996636,995784,995784,0.86885,0.00000,0.99987
13 | 91,997273,4637375,995584,409278,409248,0.78531,0.00007,0.86132
14 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/count-links.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | my $i = 0;
 7 | my ($kmersize,$total_kmers) = (undef,undef);
 8 | 
 9 | print "".join("\t", ('K', 'n_graph_kmers', 'n_link_kmers', 'n_links', 'link_junction_mem'))."\n";
10 | 
11 | while(my $line = <>) {
12 |   chomp($line);
13 |   if($line =~ /\[graph\] kmer-size: ([0-9]+)/i) {
14 |     $kmersize = $1;
15 |   }
16 |   elsif($line =~ /\[GReader\] ([0-9,]+) kmers, .* filesize/i) {
17 |     $total_kmers = $1;
18 |     $total_kmers =~ s/,//g;
19 |   }
20 |   elsif($line =~ /kmers-with-paths: ([0-9,]+), num paths: ([0-9,]+), path-bytes: (.*)/gi) {
21 |     my ($nkmers,$nlinks,$linkmem) = ($1,$2,$3);
22 |     $nkmers =~ s/,//g;
23 |     $nlinks =~ s/,//g;
24 |     $linkmem =~ s/,//g;
25 |     print "$kmersize\t$total_kmers\t$nkmers\t$nlinks\t$linkmem\n";
26 |     ($kmersize,$total_kmers) = (undef,undef)
27 |   }
28 |   $i++
29 | }
30 | 
31 | print STDERR "[$0] read $i lines\n";
32 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/sga-all-kmers.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eou pipefail
 4 | set -x
 5 | 
 6 | if [ $# != 4 ]
 7 | then
 8 |   ( >&2 echo "usage: $0 <outdir> <in.1.fq> <in.2.fq> <ref>" )
 9 |   exit -1
10 | fi
11 | 
12 | function abspath {
13 |   echo "$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
14 | }
15 | 
16 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
17 | CTXDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && cd ../.. && pwd )"
18 | 
19 | DNACAT=${CTXDIR}/libs/seq_file/bin/dnacat
20 | PY_BREAK_VS_TRUTH=${CTXDIR}/scripts/python/break-contigs-vs-truth.py
21 | 
22 | outdir="$1"
23 | IN1=`abspath "$2"`
24 | IN2=`abspath "$3"`
25 | REF=`abspath "$4"`
26 | 
27 | KMERS="21 31 41 51 61 71 81 91"
28 | 
29 | mkdir -p $outdir
30 | cd $outdir
31 | 
32 | $DIR/sga.sh $IN1 $IN2 "$KMERS"
33 | 
34 | for k in `echo "$KMERS"`
35 | do
36 |   $DNACAT -P $REF | $PY_BREAK_VS_TRUTH 21 k$k/assemble.m${k}-contigs.fa > k$k/stats.k${k}.out 2> k$k/stats.k${k}.txt
37 | done
38 | 
39 | cd ..
40 | 


--------------------------------------------------------------------------------
/src/graph/prune_nodes.h:
--------------------------------------------------------------------------------
 1 | #ifndef PRUNE_NODES_H_
 2 | #define PRUNE_NODES_H_
 3 | 
 4 | //
 5 | // Pruning nodes from the graph
 6 | //
 7 | #include "cortex_types.h"
 8 | #include "db_node.h"
 9 | 
10 | // Remove a node from the graph, do not edit any edges / adjacent nodes
11 | // Threadsafe
12 | void prune_node_without_edges_mt(dBGraph *db_graph, hkey_t hkey);
13 | 
14 | void prune_node(dBGraph *db_graph, hkey_t node);
15 | 
16 | // Unitig pruning used by ctx_clean
17 | void prune_unitig(dBNode *nodes, size_t len, dBGraph *db_graph);
18 | 
19 | // Used by ctx_subgraph.c, clean_graph.c
20 | // flags is a bit array, one bit per kmer
21 | void prune_nodes_lacking_flag(size_t num_threads, const uint8_t *flags,
22 |                               dBGraph *db_graph);
23 | 
24 | // Currently unused
25 | // remove nodes if not in any colour
26 | // i.e. db_node_has_col(graph,node,colour) == false for all colours
27 | void prune_uncoloured_nodes(dBGraph *db_graph);
28 | 
29 | #endif /* PRUNE_NODES_H_ */
30 | 


--------------------------------------------------------------------------------
/libs/cJSON/tests/test5:
--------------------------------------------------------------------------------
 1 | {"menu": {
 2 |     "header": "SVG Viewer",
 3 |     "items": [
 4 |         {"id": "Open"},
 5 |         {"id": "OpenNew", "label": "Open New"},
 6 |         null,
 7 |         {"id": "ZoomIn", "label": "Zoom In"},
 8 |         {"id": "ZoomOut", "label": "Zoom Out"},
 9 |         {"id": "OriginalView", "label": "Original View"},
10 |         null,
11 |         {"id": "Quality"},
12 |         {"id": "Pause"},
13 |         {"id": "Mute"},
14 |         null,
15 |         {"id": "Find", "label": "Find..."},
16 |         {"id": "FindAgain", "label": "Find Again"},
17 |         {"id": "Copy"},
18 |         {"id": "CopyAgain", "label": "Copy Again"},
19 |         {"id": "CopySVG", "label": "Copy SVG"},
20 |         {"id": "ViewSVG", "label": "View SVG"},
21 |         {"id": "ViewSource", "label": "View Source"},
22 |         {"id": "SaveAs", "label": "Save As"},
23 |         null,
24 |         {"id": "Help"},
25 |         {"id": "About", "label": "About Adobe CVG Viewer..."}
26 |     ]
27 | }}
28 | 


--------------------------------------------------------------------------------
/scripts/report/make-link-plot.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -eou pipefail
 3 | 
 4 | if [[ $# -ne 3 ]]; then
 5 |   echo "usage: $0 <link-cov-heatmap.R> <in.csv> <out.pdf>" 1>&2
 6 |   exit -1
 7 | fi
 8 | 
 9 | #in:  data/sample.kK.se.links.csv
10 | #out: plots/sample.kK.se.links.pdf
11 | script=$1
12 | in=$2
13 | out=$3
14 | 
15 | KMER=`echo "$in" | grep -oE 'k[0-9]+' | grep -oE '[0-9]+'`
16 | CUTOFFFILE=`echo "$in" | awk '{gsub(/\.links\.csv$/,".links.thresh")}1'`
17 | KCOVFILE=`echo "$in" | awk '{gsub(/\.(se|pe)\.links\.csv$/,".kmercov")}1'`
18 | READLENFILE=`echo "$in" | awk '{gsub(/\.(se|pe)\.links\.csv$/,".readlen")}1'`
19 | 
20 | CUTOFF=`([[ -e $CUTOFFFILE ]] && cat $CUTOFFFILE) || echo 0`
21 | KCOV=`([[ -e $KCOVFILE ]] && cat $KCOVFILE) || echo 0`
22 | READLEN=`([[ -e $READLENFILE ]] && cat $READLENFILE) || echo 0`
23 | 
24 | echo KMER=$KMER
25 | echo CUTOFFFILE=$CUTOFFFILE
26 | echo KCOVFILE=$KCOVFILE
27 | echo READLENFILE=$READLENFILE
28 | 
29 | set -o xtrace
30 | $script $in $out $CUTOFF $KMER $KCOV $READLEN
31 | 


--------------------------------------------------------------------------------
/src/basic/hash_mem.h:
--------------------------------------------------------------------------------
 1 | #ifndef HASH_MEM_H_
 2 | #define HASH_MEM_H_
 3 | 
 4 | #define REHASH_LIMIT 20
 5 | #define IDEAL_OCCUPANCY 0.75f
 6 | #define WARN_OCCUPANCY 0.9f
 7 | // bucket size must be <256
 8 | #define MAX_BUCKET_SIZE 48
 9 | 
10 | // Hash table capacity is x*(2^y) where x and y are parameters
11 | // memory is x*(2^y)*sizeof(BinaryKmer) + (2^y) * 2
12 | static inline size_t ht_mem(size_t bktsize, size_t nbkts, size_t nbits) {
13 |   return (bktsize * nbkts * nbits)/8 + (nbkts) * sizeof(uint8_t[2]);
14 | }
15 | 
16 | // Returns capacity of a hash table that holds at least nkmers
17 | size_t hash_table_cap(uint64_t nkmers, uint64_t *num_bkts_ptr, uint8_t *bkt_size_ptr);
18 | 
19 | // Returns memory required to hold nkmers
20 | size_t hash_table_mem(uint64_t nkmers, size_t entrybits, uint64_t *nkmers_ptr);
21 | 
22 | // Returns memory used for hashtable no more than some memory limit
23 | size_t hash_table_mem_limit(size_t memlimit, size_t entrybits, uint64_t *nkmers_ptr);
24 | 
25 | #endif /* HASH_MEM_H_ */
26 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls3/calls.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.2
 2 | ##FILTER=<ID=PASS,Description="All filters passed">
 3 | ##fileDate=20151014
 4 | ##reference=ref/ref.fa
 5 | ##contig=<ID=ref,length=200>
 6 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 7 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT
 8 | ref	2	.	C	GCAAGCTTTTCTAATTCGTAT	.	PASS	.	.
 9 | ref	10	.	A	C,GCAAGCTTTTCTAATTCGTAT	.	PASS	.	.
10 | ref	11	.	A	GCAAGCTTTTCTAATTCGTAT	.	PASS	.	.
11 | ref	17	.	A	G	.	PASS	.	.
12 | ref	31	.	A	T	.	PASS	.	.
13 | ref	32	.	C	T	.	PASS	.	.
14 | ref	33	.	G	A	.	PASS	.	.
15 | ref	35	.	C	G	.	PASS	.	.
16 | ref	54	.	A	G	.	PASS	.	.
17 | ref	56	.	G	C	.	PASS	.	.
18 | ref	80	.	C	T	.	PASS	.	.
19 | ref	85	.	G	A	.	PASS	.	.
20 | ref	90	.	C	G	.	PASS	.	.
21 | ref	91	.	A	GCAAGCTTTTCTAATTCGTAT . PASS	. .
22 | ref	120	.	G	A	.	PASS	.	.
23 | ref	122	.	G	T	.	PASS	.	.
24 | ref	140	.	A	C	.	PASS	.	.
25 | ref	142	.	G	A	.	PASS	.	.
26 | ref	144	.	C	T	.	PASS	.	.
27 | ref	146	.	C	A	.	PASS	.	.
28 | ref	146	.	C	G	.	PASS	.	.
29 | ref	167	.	T	C	.	PASS	.	.
30 | 


--------------------------------------------------------------------------------
/results/var_calling_10ecoli/about.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | # Cortex_var only compiles and runs cleanly on Linux
 3 | # Dependcies: Cortex_var, vcf-hack, stampy, vcftools
 4 | 
 5 | Simulate a diploid with 60X coverage (30X per chrom). 1% seq error and
 6 | 100bp read length, with paired end reads (insert length ~250bp, stddev = 50).
 7 | 
 8 | Diploid is simulated by mutating a ref twice to create two chromosomes. We then
 9 | call variants with the bubble caller including the reference. Comparing against
10 | the truth gives us a FDR and sensitivity measurement.
11 | 
12 | Run simulation with k=21 and k=31:
13 | 
14 |     make K=21 run-mccortex run-cortex >& results/20150428.tues.k21.linux.txt
15 |     make K=31 run-mccortex run-cortex >& results/20150428.tues.k31.linux.txt
16 | 
17 | Re-run again with read lengths of 150bp:
18 | 
19 |     make K=21 run-mccortex run-cortex >& results/20150429.wed.k21.linux.txt
20 |     make K=31 run-mccortex run-cortex >& results/20150429.wed.k31.linux.txt
21 | 
22 | Clean with:
23 | 
24 |     make clean K=21
25 |     make clean K=31
26 | 


--------------------------------------------------------------------------------
/src/tools/pop_bubbles.h:
--------------------------------------------------------------------------------
 1 | #ifndef POP_BUBBLES_H_
 2 | #define POP_BUBBLES_H_
 3 | 
 4 | #include "db_graph.h"
 5 | 
 6 | typedef struct
 7 | {
 8 |   int max_rmv_covg, max_rmv_klen, max_rmv_kdiff;
 9 | } PopBubblesPrefs;
10 | 
11 | /**
12 |  * visited, rmvbits should each have at least db_graph->capacity bits
13 |  * and should be initialised to zeros
14 |  * rmvbits will have bits set for all nodes that should be removed
15 |  * @param max_rmv_covg only remove contigs with mean covg <= max_rmv_covg,
16 |  *                     ignored if <= 0.
17 |  * @param max_rmv_klen only remove contigs with num kmers <= max_rmv_klen,
18 |  *                     ignored if <= 0.
19 |  * @param max_rmv_kdiff only remove contigs if max diff in kmers <= max_rmv_kdiff,
20 |  *                      ignored if < 0.
21 |  * @return number of bubbles popped
22 | **/
23 | size_t pop_bubbles(const dBGraph *db_graph, size_t nthreads,
24 |                    PopBubblesPrefs prefs,
25 |                    uint8_t *visited, uint8_t *rmvbits);
26 | 
27 | #endif /* POP_BUBBLES_H_ */
28 | 


--------------------------------------------------------------------------------
/results/var_calling_diploid_chr22_1Mbp/about.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | # Cortex_var only compiles and runs cleanly on Linux
 3 | # Dependcies: Cortex_var, vcf-hack, stampy, vcftools
 4 | 
 5 | Simulate a diploid with 60X coverage (30X per chrom). 1% seq error and
 6 | 100bp read length, with paired end reads (insert length ~250bp, stddev = 50).
 7 | 
 8 | Diploid is simulated by mutating a ref twice to create two chromosomes. We then
 9 | call variants with the bubble caller including the reference. Comparing against
10 | the truth gives us a FDR and sensitivity measurement.
11 | 
12 | Run simulation with k=21 and k=31:
13 | 
14 |     make K=21 run-mccortex run-cortex >& results/20150428.tues.k21.linux.txt
15 |     make K=31 run-mccortex run-cortex >& results/20150428.tues.k31.linux.txt
16 | 
17 | Re-run again with read lengths of 150bp:
18 | 
19 |     make K=21 run-mccortex run-cortex >& results/20150429.wed.k21.linux.txt
20 |     make K=31 run-mccortex run-cortex >& results/20150429.wed.k31.linux.txt
21 | 
22 | Clean with:
23 | 
24 |     make clean K=21
25 |     make clean K=31
26 | 


--------------------------------------------------------------------------------
/travis/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # Only run if we are not doing Coverity Scan analysis
 6 | # The COVERITY_SCAN_BRANCH environment variable is not set until AFTER install
 7 | # step has run, so we do a check on which git branch we have
 8 | if [ $(git rev-parse --abbrev-ref HEAD) != "coverity_scan" ]
 9 | then
10 | 
11 |   # Compile third party code
12 |   cd libs && make && cd ..
13 | 
14 |   # Set up cpanm, install JSON perl package
15 |   # Using default ~/perl5 local directory
16 |   curl -L https://cpanmin.us | perl - App::cpanminus
17 |   ~/perl5/bin/cpanm --force --local-lib=~/perl5 local::lib && eval $(perl -I ~/perl5/lib/perl5/ -Mlocal::lib)
18 |   ~/perl5/bin/cpanm --force --local-lib=~/perl5 JSON
19 | 
20 |   # Set up installing perl modules library path with:
21 |   # eval "$(perl -I$HOME/perl5/lib/perl5 -Mlocal::lib)"
22 |   echo '[ $SHLVL -eq 1 ] && eval "$(perl -I$HOME/perl5/lib/perl5 -Mlocal::lib)"' >> ~/.bashrc
23 |   echo '[ $SHLVL -eq 1 ] && eval "$(perl -I$HOME/perl5/lib/perl5 -Mlocal::lib)"' >> ~/.profile
24 | 
25 | fi
26 | 


--------------------------------------------------------------------------------
/scripts/analysis/sam-count-vars.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Takes input from sam-count-bases e.g.:
 4 | # NC_009648.1 21 A 66 0 0 66 0
 5 | 
 6 | use strict;
 7 | use warnings;
 8 | use List::Util qw( reduce );
 9 | 
10 | my %dna = ('A'=>0, 'C'=>1, 'G'=>2, 'T'=>3);
11 | my $ncalls = 0;
12 | my $ngood_calls = 0;
13 | my $min_cov_frac = 0.9;
14 | 
15 | for($ncalls = 0; defined(my $line = <>); $ncalls++) {
16 |   chomp($line);
17 |   my @cols = split('\s', $line);
18 |   my $ref_base = $cols[2];
19 |   my $tot_cov = $cols[3];
20 |   my %cov = ('A'=>$cols[4+0], 'C'=>$cols[4+1], 'G'=>$cols[4+2], 'T'=>$cols[4+3]);
21 |   my $max_base = reduce { $cov{$a} > $cov{$b} ? $a : $b } keys %cov;
22 |   if($max_base ne $ref_base && $cov{$max_base} >= $min_cov_frac*$tot_cov) {
23 |     $ngood_calls++;
24 |     print "".join("\t", "GOOD", @cols, $max_base)."\n";
25 |   } else {
26 |     print "".join("\t", "BAD", @cols, $max_base)."\n";
27 |   }
28 | }
29 | 
30 | print "$ngood_calls / $ncalls (" .
31 |       sprintf("%.2f", $ncalls ? (100*$ngood_calls)/$ncalls : 0) .
32 |       "%)\n";
33 | 


--------------------------------------------------------------------------------
/src/graph/graph_format.c:
--------------------------------------------------------------------------------
 1 | #include "global.h"
 2 | #include "graph_format.h"
 3 | 
 4 | void graph_header_capacity(GraphFileHeader *h, size_t num_of_cols)
 5 | {
 6 |   size_t i;
 7 | 
 8 |   if(num_of_cols > h->capacity) {
 9 |     h->ginfo = ctx_recallocarray(h->ginfo, h->capacity, num_of_cols, sizeof(GraphInfo));
10 |     for(i = h->capacity; i < num_of_cols; i++)
11 |       graph_info_alloc(&h->ginfo[i]);
12 |     h->capacity = num_of_cols;
13 |   }
14 | }
15 | 
16 | void graph_header_dealloc(GraphFileHeader *h)
17 | {
18 |   size_t i;
19 |   for(i = 0; i < h->capacity; i++)
20 |     graph_info_dealloc(&h->ginfo[i]);
21 |   ctx_free(h->ginfo);
22 |   memset(h, 0, sizeof(*h));
23 | }
24 | 
25 | void graph_header_print(const GraphFileHeader *header)
26 | {
27 |   printf("HEADER\n");
28 |   printf("  version: %u\n", header->version);
29 |   printf("  kmer_size: %u\n", header->kmer_size);
30 |   printf("  num_of_bitfields: %u\n", header->num_of_bitfields);
31 |   printf("  num_of_cols: %u\n", header->num_of_cols);
32 |   printf("  [capacity: %zu]\n", header->capacity);
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls4/pluto.fa:
--------------------------------------------------------------------------------
 1 | >ref_ref
 2 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG
 3 | >ref_ref
 4 | ACTATGGCCAAAGAGCAATACCCCCCGATGACGGCTAGGTTGTTTTTCGGCTCAAGACTCTATCCTGCGGACCGTTCCGCAGGCGTGCCCAGCACCAGGGTCCGTACATTAATACCGTCGCGACTTACTTATTAAGCGTAGGGCACAGCAATATTTCCGCTGGCCCTTACAACCTAGTTTGTCCATAGAGCCATCATAGG
 5 | >ref_alt
 6 | ACTATGtCCAAAGAGCAATACCCCCCGcTGACGGCTAGGTTGTTTTTCcgatCTCAAGACTCTATCCTGCGGgatCCGTTCCGCAGGCGTGCCCAacTCCGTACATTAATACCGTCGgGACTTACTTATTAAGCGTAGttactcttcATATTTCCGCTGGCCCTTACctagACCTAGTTTGTCCATAGAGCaG
 7 | >ref_alt
 8 | ACTATGtCCAAAGAGCAATACCCCCCGcTGACGGCTAGGTTGTTTTTCcgatCTCAAGACTCTATCCTGCGGgatCCGTTCCGCAGGCGTGCCCAacTCCGTACATTAATACCGTCGgGACTTACTTATTAAGCGTAGttactcttcATATTTCCGCTGGCCCTTACctagACCTAGTTTGTCCATAGAGCaG
 9 | >ref_alt
10 | ACTATGtCCAAAGAGCAATACCCCCCGcTGACGGCTAGGTTGTTTTTCcgatCTCAAGACTCTATCCTGCGGgatCCGTTCCGCAGGCGTGCCCAacTCCGTACATTAATACCGTCGgGACTTACTTATTAAGCGTAGttactcttcATATTTCCGCTGGCCCTTACctagACCTAGTTTGTCCATAGAGCaG
11 | 


--------------------------------------------------------------------------------
/src/basic/decomp_breakpoint.h:
--------------------------------------------------------------------------------
 1 | #ifndef DECOMP_BREAKPOINT_H_
 2 | #define DECOMP_BREAKPOINT_H_
 3 | 
 4 | #include "aligned_call.h"
 5 | #include "call_file_reader.h"
 6 | #include "seq_reader.h" // genome hash
 7 | 
 8 | typedef struct {
 9 |   uint64_t nflanks_not_uniquely_mapped, nflanks_diff_chroms;
10 |   uint64_t nflanks_diff_strands, nflanks_overlap_too_much;
11 |   uint64_t ncalls, ncalls_mapped;
12 | } DecompBreakpointStats;
13 | 
14 | typedef struct DecompBreakpointStruct DecompBreakpoint;
15 | 
16 | DecompBreakpoint* decomp_brkpt_init();
17 | void decomp_brkpt_destroy(DecompBreakpoint *bd);
18 | 
19 | void decomp_brkpt_cpy_stats(DecompBreakpointStats *stats,
20 |                             const DecompBreakpoint *bd);
21 | 
22 | // Convert a call into an aligned call
23 | // return 0 on success, otherwise non-zero on failure
24 | int decomp_brkpt_call(DecompBreakpoint *db,
25 |                       ChromHash *genome, size_t nsamples,
26 |                       const CallFileEntry *centry,
27 |                       AlignedCall *ac);
28 | 
29 | #endif /* DECOMP_BREAKPOINT_H_ */
30 | 


--------------------------------------------------------------------------------
/results/klebsiella/kleb_pneumoniae/freebayes/freebayes.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eou pipefail
 4 | set -o xtrace
 5 | 
 6 | REF=/data2/users/turner/cortex_sims/klebsiella/kleb_pneumoniae/ref/GCF_000016305.1_ASM1630v1_genomic.fa
 7 | BAM=/data2/users/turner/cortex_sims/klebsiella/kleb_pneumoniae/remap/mapped/KlebPneu.bam
 8 | BAMRMDUP=/data2/users/turner/cortex_sims/klebsiella/kleb_pneumoniae/remap/mapped/KlebPneu.rmdup.bam
 9 | 
10 | CTXDIR=~/mccortex
11 | FREEBAYES=~/bioinf/freebayes/bin/freebayes
12 | BGZIP=$CTXDIR/libs/htslib/bgzip
13 | BCFTOOLS=$CTXDIR/libs/bcftools/bcftools
14 | 
15 | $FREEBAYES -f $REF -p 1 $BAMRMDUP > freebayes.rmdup.vcf
16 | $BGZIP freebayes.rmdup.vcf
17 | $BCFTOOLS index freebayes.rmdup.vcf.gz
18 | 
19 | $BCFTOOLS norm --check-ref x -m -any --fasta-ref $REF --site-win 5000 freebayes.rmdup.vcf.gz | \
20 |   $BCFTOOLS norm --rm-dup any --do-not-normalize | \
21 |   $VCF_PASS > freebayes.vcf
22 | $BGZIP freebayes.vcf
23 | $BCFTOOLS index freebayes.vcf.gz
24 | 
25 | # Analysis
26 | rm -rf mummer_isec mapping_truth cortex.k31.k61.{mapping,isec}.log
27 | ./analysis.sh >& analysis.log
28 | 


--------------------------------------------------------------------------------
/results/benchmark/10diploid10X/smaller.fa:
--------------------------------------------------------------------------------
 1 | >smaller
 2 | TCATAACCCCCTGACCTATGAAACTTTGATTATCAAAGATACAAAGACCGAAGGTTATATCCGACGGGAAGCCGGAGGAG
 3 | CGTTTGCCCTATAGTTACCTCTACCGAAATGGGATCACGAAGACTTGCGCGAGAAGCACGACGAGTTTTGTCTCGTGTTT
 4 | TCATCGCAATGCAAGGGCTTTCTTCGGAGCTCTGCGTGCCTACTCTCGAAGTGCCACCTCCAGTTGGCTTCATCAGGTGT
 5 | GCTACAGGCCTGGACCAGATGGTAGTAGAGGCAAAGGACTGAGTTTGGTTCGATCTAAGCGAAACTTAGATCCTTTACGG
 6 | TGCTTGTAATCAGCTTCAGAGATATTGCCTTAACATCTTTGCCGAGGGCAGATACAGAGAAAAATGGATCTTTGAGATTA
 7 | TGATGTTACGGCAGTACGTGCGGTCTTCAGATTCTCCGACAGATATACCATATTACGCCCTACTCGCTATAATAGGCGGT
 8 | GTAAGATATACCGGAATCACAATCGGAGATGAACGAGTTACGTCGAAATGATTGTTATTATGTCCGTCCTCCACCCTAAT
 9 | GGGATTTTTACCTACCCGACTGCAAGTTGGTCGTATGCGACGTTTTCTTTTCGCATTTAGACCCACCAGGGTACATTTAC
10 | AGATAGGGAGTTCCGTCCCTTCGCCTGCTCACGAAAATGCTCAGCCGTTTAATTATGCTGTTCATACTATGCACATTTTA
11 | AGAGCCATGTTTGGAGACCACGAGATACTCTTAATTCCGGTCCCAAGTTGAATCTGCTGTGTCCTTAACACACACCCGAA
12 | GGCATCAGGCCTCGGAGGCATGTACCAGTGCCACCTACCAAAGGTCTTGAAATCACCTTAGTTCAGTACGCTTGTAAAAA
13 | CATCACTTGATTCATCTCGGAATACTCACGTCGGGCGTGCGACACAAAACGTTCCATAGGAAGCACGCCTACGATTTAGC
14 | GAGTTTGGACAGCCCTAGACTATGTTATCCATGATTCGAC
15 | 


--------------------------------------------------------------------------------
/src/global/global.c:
--------------------------------------------------------------------------------
 1 | #include "global.h"
 2 | 
 3 | #include <sys/time.h> // for seeding random
 4 | #include <unistd.h> // getpid()
 5 | 
 6 | #include "ctx_output.h" // ctx_output_init()
 7 | 
 8 | #define strhash_fast_mix(h,x) ((h) * 37 + (x))
 9 | #define rotl32(h,r) ((h)<<(r)|(h)>>(32-(r)))
10 | 
11 | static inline uint32_t get_rand_seed()
12 | {
13 |   struct timeval now;
14 |   gettimeofday(&now, NULL);
15 | 
16 |   uint32_t h = rand();
17 |   h = strhash_fast_mix(h, rotl32((uint32_t)now.tv_sec,  h & 31));
18 |   h = strhash_fast_mix(h, rotl32((uint32_t)now.tv_usec, h & 31));
19 |   h = strhash_fast_mix(h, (uint32_t)getpid());
20 |   return h;
21 | }
22 | 
23 | void seed_random()
24 | {
25 |   uint32_t seed = get_rand_seed();
26 |   srand(seed);
27 |   srand48(~seed);
28 | }
29 | 
30 | void cortex_init()
31 | {
32 |   seed_random();
33 |   // Cannot use die/warn/message/timestamp until we have completed setup
34 |   ctx_output_init();
35 |   // Now safe to use die/warn/message/timestamp methods
36 |   // since mutex and cmdcode have been set
37 | }
38 | 
39 | void cortex_destroy()
40 | {
41 |   ctx_output_destroy();
42 | }
43 | 


--------------------------------------------------------------------------------
/scripts/calculations/bloom-filter-fpr.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | # Walking through 100 copies of Alu, assume ~300 kmers each
 7 | # ~1 million copies Alu in the human genome
 8 | 
 9 | my $alu_length = 300;
10 | my $n_copies = 100;
11 | 
12 | # Power in perl is **
13 | my $k = 3;
14 | my $m = 2**22 * 8; # 4MB, 33.6 million bits
15 | my $success = 1;
16 | my $num_rep_kmers = $n_copies*$alu_length;
17 | my $false_pos_rate;
18 | 
19 | for(my $i = 0; $i < $num_rep_kmers; $i++) {
20 |   $false_pos_rate = bloom_false_pos($k,$m,$i);
21 |   $success *= (1-$false_pos_rate);
22 | }
23 | 
24 | # Complete => traversing $num_rep_kmers without a false positive
25 | print "$k hash functions; $m bits;\n";
26 | print "$num_rep_kmers false positive rate: $false_pos_rate\n";
27 | print "complete success rate: $success\n";
28 | print "complete failure rate: ".(1-$success)."\n";
29 | 
30 | # k is the number of hash functions
31 | # m is the number of total bits
32 | # n is the number of bits set
33 | sub bloom_false_pos
34 | {
35 |   my ($k,$m,$n) = @_;
36 |   return (1 - exp(1)**(-$k * $n / $m))**$k;
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/src/basic/hash.h:
--------------------------------------------------------------------------------
 1 | #ifndef HASH_H_
 2 | #define HASH_H_
 3 | 
 4 | // Hash functions
 5 | #if defined(USE_CITY_HASH)
 6 |   // Use Google's CityHash
 7 |   #include "misc/city.h"
 8 |   #define HASH_NAME_STR "CityHash32"
 9 |   #define ctx_hash32(src,n,rehash) ((uint32_t)CityHash64WithSeed((char*)(src), (n), (rehash)))
10 |   #define ctx_hash64(src,n,rehash) CityHash64WithSeed((char*)(src), (n), (rehash))
11 | #elif defined(USE_XXHASH)
12 |   // Use xxHash
13 |   #include "xxHash/xxhash.h"
14 |   #define HASH_NAME_STR "xxHash32"
15 |   #define ctx_hash32(src,n,rehash) XXH32((src), (n), (rehash))
16 |   #define ctx_hash64(src,n,rehash) XXH64((src), (n), (rehash))
17 | #else
18 |   // Use Bob Jenkin's lookup3
19 |   #include "misc/lookup3.h"
20 |   #define HASH_NAME_STR "Lookup3"
21 |   #define ctx_hash32(src,n,rehash) lk3_hashlittle((src), (n), (rehash))
22 | 
23 | static inline uint64_t ctx_hash64(void *ptr, size_t n, uint64_t init)
24 | {
25 |   uint32_t a = init>>32, b = init;
26 |   lk3_hashlittle2(ptr, n, &a, &b); // note: `a` slightly better mixed than `b`
27 |   return (((uint64_t)b<<32) | a);
28 | }
29 | 
30 | #endif
31 | 
32 | #endif /* HASH_H_ */
33 | 


--------------------------------------------------------------------------------
/tests/pjoin/pjoin1/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Sanity check that merging matching link files gives correct counts
 3 | #
 4 | 
 5 | SHELL:=/bin/bash -euo pipefail
 6 | 
 7 | K=7
 8 | CTXDIR=../../..
 9 | MCCORTEX=$(shell echo $(CTXDIR)/bin/mccortex$$[(($(K)+31)/32)*32 - 1])
10 | CTXPIPELINE=$(CTXDIR)/scripts/make-pipeline.pl
11 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat
12 | 
13 | REFLEN=5000
14 | 
15 | TGTS=genome0.fa genome0.k$(K).ctx genome0.k$(K).ctp.gz \
16 |      genome1.fa genome1.k$(K).ctx genome1.k$(K).ctp.gz \
17 |      joint.k$(K).ctp.gz
18 | 
19 | all: joint.k$(K).ctp.gz
20 | 
21 | genome%.fa:
22 | 	$(DNACAT) -n $(REFLEN) -M <(echo ref) -F > $@
23 | 
24 | genome%.k$(K).ctx: genome%.fa
25 | 	$(MCCORTEX) build -q -k $(K) --sample Genome0 -1 $< $@
26 | 
27 | genome%.k$(K).ctp.gz: genome%.k$(K).ctx genome%.fa
28 | 	$(MCCORTEX) thread -q -o $@ -1 genome$*.fa genome$*.k$(K).ctx
29 | 
30 | joint.k$(K).ctp.gz: genome0.k$(K).ctp.gz genome1.k$(K).ctp.gz
31 | 	$(MCCORTEX) pjoin -q -n 1M -o $@ genome0.k$(K).ctp.gz genome0.k$(K).ctp.gz genome1.k$(K).ctp.gz genome0.k$(K).ctp.gz genome1.k$(K).ctp.gz
32 | 
33 | clean:
34 | 	rm -rf $(TGTS)
35 | 
36 | .PHONY: all clean
37 | 


--------------------------------------------------------------------------------
/tests/graphviz/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL:=/bin/bash -euo pipefail
 2 | 
 3 | K=15
 4 | CTXDIR=../..
 5 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K)
 6 | CTX2DOT=$(CTXDIR)/scripts/perl/mccortex-graph-to-graphviz.pl
 7 | 
 8 | all: seq.k$(K).ctx seq.k$(K).unitigs.dot seq.k$(K).kmers.dot
 9 | 
10 | plots: seq.k$(K).unitigs.pdf seq.k$(K).kmers.pdf
11 | 
12 | seq.fa:
13 | 	echo TACATTCCCCCATAGTCGTAGGCGTTAAATACA     > seq.fa
14 | 	echo TACATTCCCCCATAGTAGTAGGCGTTAAATACA    >> seq.fa
15 | 	echo GGCGTTCGCTTATCCGGATAAGCGAACGCC       >> seq.fa
16 | 	echo ATATATATATATATATATATATATATATATATATAT >> seq.fa
17 | 	echo ACTTCTTCGAAAAAAAAAAAAAAATACTGGCCCTAACTTCTTCGAAAAAA >> seq.fa
18 | 
19 | seq.k$(K).ctx: seq.fa
20 | 	$(MCCORTEX) build -q -k $(K) --sample MasterSeq --seq seq.fa seq.k$(K).ctx
21 | 
22 | seq.k$(K).unitigs.dot: seq.k$(K).ctx
23 | 	$(MCCORTEX) unitigs -q --dot $< > $@
24 | 
25 | seq.k$(K).kmers.dot: seq.k$(K).ctx
26 | 	$(CTX2DOT) $< > $@
27 | 
28 | %.pdf: %.dot
29 | 	cat $< | dot -Tpdf > $@
30 | 
31 | clean:
32 | 	rm -rf seq.fa seq.k$(K).ctx
33 | 	rm -rf seq.k$(K).unitigs.dot seq.k$(K).kmers.dot
34 | 	rm -rf seq.k$(K).unitigs.pdf seq.k$(K).kmers.pdf
35 | 
36 | .PHONY: all clean plots
37 | 


--------------------------------------------------------------------------------
/tests/pjoin/pjoin0/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL:=/bin/bash -euo pipefail
 2 | 
 3 | K=9
 4 | CTXDIR=../../..
 5 | MCCORTEX=$(shell echo $(CTXDIR)/bin/mccortex$$[(($(K)+31)/32)*32 - 1])
 6 | 
 7 | LINKS=paths.0.k$(K).ctp.gz paths.1.k$(K).ctp.gz
 8 | SEQ=genome.0.fa genome.1.fa
 9 | GRAPHS=$(SEQ:.fa=.k$(K).ctx)
10 | MERGED=genomes.k$(K).ctx genomes.k$(K).ctp.gz
11 | 
12 | TGTS=$(SEQ) $(GRAPHS) $(LINKS) $(MERGED)
13 | 
14 | # non-default target: genome.k$(K).pdf
15 | 
16 | all: $(TGTS)
17 | 
18 | clean:
19 | 	rm -rf $(TGTS)
20 | 
21 | genome.0.fa:
22 | 	echo TGGTGTCGCCTACA > $@
23 | 	echo TtGTGTCGCCTAgA >> $@
24 | 
25 | genome.1.fa:
26 | 	echo TtGTGTCGCCTACA > $@
27 | 	echo TGGTGTCGCCTAgA >> $@
28 | 
29 | genome.%.k$(K).ctx: genome.%.fa
30 | 	$(MCCORTEX) build -q -m 1M -k $(K) --sample Gnome$* --seq genome.$*.fa $@
31 | 
32 | paths.%.k$(K).ctp.gz: genome.%.k$(K).ctx genome.%.fa
33 | 	$(MCCORTEX) thread -q -m 1M --seq genome.$*.fa -o $@ genome.$*.k$(K).ctx
34 | 	gunzip -c $@
35 | 
36 | genomes.k$(K).ctx: $(LINKS)
37 | 	$(MCCORTEX) join -q -o $@ $(GRAPHS)
38 | 
39 | genomes.k$(K).ctp.gz: $(LINKS)
40 | 	$(MCCORTEX) pjoin -q -o $@ $(LINKS)
41 | 	gunzip -c $@
42 | 
43 | .PHONY: all plots clean
44 | 


--------------------------------------------------------------------------------
/libs/cJSON/LICENSE:
--------------------------------------------------------------------------------
 1 |   Copyright (c) 2009 Dave Gamble
 2 |  
 3 |   Permission is hereby granted, free of charge, to any person obtaining a copy
 4 |   of this software and associated documentation files (the "Software"), to deal
 5 |   in the Software without restriction, including without limitation the rights
 6 |   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 |   copies of the Software, and to permit persons to whom the Software is
 8 |   furnished to do so, subject to the following conditions:
 9 |  
10 |   The above copyright notice and this permission notice shall be included in
11 |   all copies or substantial portions of the Software.
12 |  
13 |   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 |   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 |   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 |   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 |   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 |   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 |   THE SOFTWARE.
20 | 
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014  Isaac Turner <turner.isaac@gmail.com>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tests/threading/threading1/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL:=/bin/bash -euo pipefail
 2 | 
 3 | K=31
 4 | CTXDIR=../../..
 5 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K)
 6 | CTX2DOT=$(CTXDIR)/scripts/perl/mccortex-graph-to-graphviz.pl
 7 | 
 8 | GRAPHS=ref.k$(K).ctx ref.ctp.gz
 9 | LOGS=$(addsuffix .log,$(GRAPHS))
10 | PLOTS=ref.pdf ref.dot
11 | TGTS=ref.fa $(GRAPHS)
12 | 
13 | all: $(TGTS)
14 | 
15 | plots: ref.dot ref.pdf
16 | 
17 | ref.fa:
18 | 	echo CGATTGAATTCCACCGATAATGCAGATGTGAGCCTCAGCATCTACTGCTTCCTCGTCGTCGGGGACTTTTGTTGACCTACCACATGATACATGCGGCCAT >  $@
19 | 	echo ACAAGCTAAAGAAGCTAGCCAGTGCAGGCTCCCTTCAGCATCTACTGCTTCCTCGTCGTCGGGGACTAGAAACGTGACCATCGGCCACCGAAAGATAAGG >> $@
20 | 	echo ACAAGCTAAAGAAGCTAGCCAGTGCAGGCTCCCTTCAGCATCTACTGCTGGCTCGTCGTCGGGGACTAGAAACGTGACCATCGGCCACCGAAAGATAAGG >> $@
21 | 
22 | ref.k$(K).ctx: ref.fa
23 | 	$(MCCORTEX) build -m 1M -k $(K) --sample MsSample --seq ref.fa ref.k$(K).ctx >& $@.log
24 | 
25 | ref.ctp.gz: ref.k$(K).ctx
26 | 	$(MCCORTEX) thread -m 1M --seq ref.fa -o $@ $< >& $@.log
27 | 
28 | ref.dot: ref.k$(K).ctx
29 | 	$(CTX2GV) --simplify $< > $@
30 | 
31 | ref.pdf: ref.dot
32 | 	dot -Tpdf $< > $@
33 | 
34 | clean:
35 | 	rm -rf $(TGTS) $(PLOTS) $(LOGS)
36 | 
37 | .PHONY: all plots clean
38 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls4/truth.cov.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.2
 2 | ##FILTER=<ID=PASS,Description="All filters passed">
 3 | ##fileDate=20151014
 4 | ##reference=ref/ref.fa
 5 | ##contig=<ID=ref,length=200>
 6 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 7 | ##FORMAT=<ID=K21R,Number=A,Type=Integer,Description="Coverage on ref (k=21) => sum(kmer_covs)/exp_num_kmers">
 8 | ##FORMAT=<ID=K21A,Number=A,Type=Integer,Description="Coverage on alt (k=21) => sum(kmer_covs)/exp_num_kmers">
 9 | ##mccortex_5a56358=<prev="NULL",cmd="../../../bin/mccortex31 vcfcov -m 10M -o calls.cov.vcf -r ../ref/ref.fa --max-nvars 1 calls.vcf pluto.k21.ctx",cwd="/Users/isaac/mccortex/tests/vcfcov/calls4",datetime="20151028-00:20:03",version=v0.0.3-368-gc5c8b27-dirty>
10 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	pluto
11 | ref	7	.	G	T	.	PASS	.	K21R:K21A	2:3
12 | ref	28	.	A	C	.	PASS	.	K21R:K21A	2:3
13 | ref	49	.	GG	CGAT	.	PASS	.	K21R:K21A	2:3
14 | ref	71	.	A	GAT	.	PASS	.	K21R:K21A	2:3
15 | ref	92	.	GCACCAGGG	AC	.	PASS	.	K21R:K21A	2:3
16 | ref	121	.	C	G	.	PASS	.	K21R:K21A	2:3
17 | ref	142	.	GGCACAGCA	TTACTCTTC	.	PASS	.	K21R:K21A	2:3
18 | ref	171	.	A	CTAG	.	PASS	.	K21R:K21A	2:3
19 | ref	192	.	CATCATAG	A	.	PASS	.	K21R:K21A	2:3
20 | 


--------------------------------------------------------------------------------
/scripts/make-isec.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Exit immediately if a command exits with a non-zero status.
 4 | set -euo pipefail
 5 | 
 6 | if [[ $# -ne 3 ]]
 7 | then
 8 |   echo "usage: $0 <tmpdir> <truth.vcf.gz> <results.vcf.gz>" 1>&2
 9 |   echo "  Create tmp dir and use it to count intersection of indexed VCFs" 1>&2
10 |   exit -1
11 | fi
12 | 
13 | TMPDIR="$1"
14 | TRUTHVCF="$2"
15 | RESULTSVCF="$3"
16 | 
17 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
18 | CTXDIR="$DIR/.."
19 | 
20 | BCFTOOLS=$CTXDIR"/libs/bcftools/bcftools"
21 | VCFALLELES=$CTXDIR"/libs/biogrok/vcf-count-alleles"
22 | 
23 | $BCFTOOLS isec $TRUTHVCF $RESULTSVCF -p $TMPDIR
24 | 
25 | MISSED=`$VCFALLELES $TMPDIR/0000.vcf`
26 | FP=`$VCFALLELES $TMPDIR/0001.vcf`
27 | FOUND=`$VCFALLELES $TMPDIR/0002.vcf`
28 | NCALLED=`$VCFALLELES $RESULTSVCF`
29 | NTRUTH=`$VCFALLELES $TRUTHVCF`
30 | 
31 | awk 'BEGIN{printf("Missed: %4d / %4d (%5.2f%%)\n",'$MISSED','$NTRUTH',100*'$MISSED'/'$NTRUTH')}'
32 | awk 'BEGIN{printf("FP:     %4d / %4d (%5.2f%%)\n",'$FP','$NCALLED',100*'$FP'/'$NCALLED')}'
33 | awk 'BEGIN{printf("Found:  %4d / %4d (%5.2f%%)\n",'$FOUND','$NTRUTH',100*'$FOUND'/'$NTRUTH')}'
34 | 
35 | echo "remember to delete temp dir: $TMPDIR" 1>&2
36 | 


--------------------------------------------------------------------------------
/src/tools/vcf_coverage.h:
--------------------------------------------------------------------------------
 1 | #ifndef VCF_COVERAGE_H_
 2 | #define VCF_COVERAGE_H_
 3 | 
 4 | #include "db_graph.h"
 5 | 
 6 | #include "htslib/vcf.h"
 7 | #include "htslib/faidx.h"
 8 | 
 9 | #define DEFAULT_MAX_ALLELE_LEN 100
10 | #define DEFAULT_MAX_GT_VARS 8
11 | 
12 | typedef struct {
13 |   // Stats
14 |   uint64_t nvcf_lines, nalts_read, nalts_loaded;
15 |   uint64_t nalts_too_long, nalts_no_covg, nalts_with_covg;
16 |   uint64_t ngt_kmers;
17 | } VcfCovStats;
18 | 
19 | typedef struct {
20 |   const char *kcov_ref_tag, *kcov_alt_tag;
21 |   // Don't attempt to genotype alleles bigger than this
22 |   // defaults to DEFAULT_MAX_ALLELE_LEN
23 |   uint32_t max_allele_len;
24 |   // 2^8 = 256 possible haplotypes
25 |   // defaults to DEFAULT_MAX_GT_VARS
26 |   uint32_t max_gt_vars;
27 |   bool load_kmers_only;
28 | } VcfCovPrefs;
29 | 
30 | void vcfcov_file(htsFile *vcffh, bcf_hdr_t *vcfhdr,
31 |                  htsFile *outfh, bcf_hdr_t *outhdr,
32 |                  const char *path, faidx_t *fai,
33 |                  const size_t *samplehdrids,
34 |                  const VcfCovPrefs *prefs,
35 |                  VcfCovStats *stats,
36 |                  dBGraph *db_graph);
37 | 
38 | #endif /* VCF_COVERAGE_H_ */
39 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls0/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/bash -euo pipefail
 2 | 
 3 | #
 4 | # Test vcfcov with an empty VCF
 5 | # Works with any kmer size (K)
 6 | #
 7 | 
 8 | K=21
 9 | CTXDIR=../../..
10 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K)
11 | VCFNALLELES=$(CTXDIR)/libs/biogrok/vcf-count-alleles
12 | 
13 | REF=../ref/ref.fa
14 | 
15 | VCFS=calls.cov.k$(K).vcf lowmem.cov.k$(K).vcf
16 | GRAPH=graph.k$(K).ctx
17 | LOGS=$(VCFS:=.log) $(GRAPH:=.log)
18 | 
19 | all: test
20 | 
21 | clean:
22 | 	rm -rf $(VCFS) $(LOGS) $(GRAPH)
23 | 
24 | calls.cov.k$(K).vcf: $(REF) calls.vcf $(GRAPH)
25 | 	$(MCCORTEX) vcfcov -m 10M -o $@ -r $(REF) --high-mem calls.vcf $(GRAPH) >& $@.log
26 | 
27 | lowmem.cov.k$(K).vcf: $(REF) calls.vcf $(GRAPH)
28 | 	$(MCCORTEX) vcfcov -m 10M -o $@ -r $(REF) --low-mem calls.vcf graph.k$(K).ctx >& $@.log
29 | 
30 | graph.k$(K).ctx:
31 | 	$(MCCORTEX) build -k $(K) \
32 | 	  --sample John --seq <(echo '') \
33 | 	  --sample Jane --seq <(echo '') \
34 | 	  $@ >& $@.log
35 | 
36 | test: $(VCFS)
37 | 	[[ `$(VCFNALLELES) calls.cov.k$(K).vcf` -eq 0 ]]
38 | 	[[ `$(VCFNALLELES) lowmem.cov.k$(K).vcf` -eq 0 ]]
39 | 	@echo "=> Empty VCF works."
40 | 
41 | view: calls.cov.k$(K).vcf
42 | 	gzip -fcd $<
43 | 
44 | .PHONY: all clean view test
45 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Travis CI configuration
 2 | # Note: there are currently issues with limiting coverity to a single compiler
 3 | #       see https://github.com/travis-ci/travis-ci/issues/1975
 4 | #       Instead we use a script to decide if we should do anything
 5 | 
 6 | dist: trusty
 7 | language: c
 8 | compiler:
 9 | - clang
10 | - gcc
11 | os:
12 | - linux
13 | - osx
14 | 
15 | install:
16 |   - ./travis/install.sh
17 |   - eval "$(perl -I$HOME/perl5/lib/perl5 -Mlocal::lib)"
18 | 
19 | script: ./travis/script.sh
20 | 
21 | env:
22 |   global:
23 |     # The next declaration is the encrypted COVERITY_SCAN_TOKEN, created
24 |     #   via the "travis encrypt" command using the project repo's public key
25 |     - secure: "T08ccfH7+agMchVPhAP/MTdVeFonkjlLvbY8nv/jQ5aZeeWP2i1Oop59MVtuaA4Vw1Ickjr1czLsHGob2OyMHwo2otEasMqacvOw38exblvjOptqN7dx2yu0qzBZdHxLd/uOad7HMfgtkVSwgpodeNR6+K+4LcCM7J6+iAWtLME="
26 | 
27 | addons:
28 |   coverity_scan:
29 |     project:
30 |       name: "mcveanlab/mccortex"
31 |       description: Build submitted via Travis CI
32 |     notification_email: turner.isaac@gmail.com
33 |     build_command_prepend: git submodule update --init --recursive
34 |     build_command: make
35 |     branch_pattern: coverity_scan
36 | 


--------------------------------------------------------------------------------
/tests/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | #
 6 | # Run all the tests!
 7 | #   Isaac Turner
 8 | #   2014-07-16
 9 | #
10 | # cd into each directory and run `make`
11 | #
12 | 
13 | if [[ ( $# -gt 1 ) || ( $# -eq 1 && $1 != 'noupdate' && $1 != 'update' ) ]]
14 | then
15 |   echo "./run [update|noupdate]"
16 |   exit -1
17 | fi
18 | 
19 | cwd=`pwd`
20 | echo $cwd
21 | 
22 | if [[ $# -eq 0 || $1 == 'update' ]]
23 | then
24 |   # Get all dependencies used in testing (bioinf-perl, bcftools, samtools etc.)
25 |   cd ../libs && make all && cd $cwd
26 |   if [ $? -ne 0 ]; then exit -1; fi
27 | fi
28 | 
29 | # Run cortex unit tests
30 | cd ..
31 | for k in 31 63 95 127
32 | do
33 |   make test MAXK=$k STRICT=1
34 | done
35 | cd $cwd
36 | 
37 | # Get list of current tests (all directories except 'old')
38 | dirs=`ls | grep -v '.*run.sh' | grep -v '^\.' | grep -v old`
39 | echo $dirs
40 | 
41 | cd .. && make MAXK=31 RELEASE=1 && make MAXK=63 && cd $cwd
42 | if [ $? -ne 0 ]; then exit -1; fi
43 | 
44 | for f in $dirs
45 | do
46 |   echo && echo ===== && echo "Test: $cwd/$f"
47 |   cd $f && make clean && make all && cd ..
48 |   if [ $? -ne 0 ]; then exit -1; fi
49 | done
50 | 
51 | echo $dirs
52 | echo All tests completed.
53 | 


--------------------------------------------------------------------------------
/scripts/perl/bubbles-example.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use File::Basename;
 7 | 
 8 | # Use current directory to find modules
 9 | use FindBin;
10 | use lib $FindBin::Bin;
11 | 
12 | use McCortexBubbles;
13 | 
14 | sub print_usage
15 | {
16 |   for my $err (@_) { print STDERR "Error: $err\n"; }
17 |   
18 |   print STDERR "" .
19 | "Usage: $0 <bub.gz>\n";
20 | 
21 |   exit(-1);
22 | }
23 | 
24 | if(@ARGV > 1) { print_usage(); }
25 | my ($file) = (@ARGV, "-");
26 | my $fh;
27 | open($fh, "gzip -fcd $file |") or die("Cannot read file $file: $!");
28 | 
29 | my $cb = new McCortexBubbles($fh);
30 | my ($seq5p, $seq3p, $branches, $flank5p_nkmers, $flank3p_nkmers, $branchlens, $callid);
31 | 
32 | while(1)
33 | {
34 |   ($seq5p, $seq3p, $branches,
35 |    $flank5p_nkmers, $flank3p_nkmers, $branchlens, $callid) = $cb->next();
36 |   if(!defined($seq5p)) { last; }
37 | 
38 |   print "BUBBLE $callid\n";
39 |   print ">flank5p $flank5p_nkmers nkmers=$flank5p_nkmers\n$seq5p\n";
40 |   print ">flank3p $flank5p_nkmers nkmers=$flank3p_nkmers\n$seq3p\n";
41 |   print "". join('', map {">branch$_ nkmers=$branchlens->[$_]\n$branches->[$_]\n"} 0..(@$branches-1));
42 |   print "\n";
43 | }
44 | 
45 | close($fh);
46 | 


--------------------------------------------------------------------------------
/src/paths/gpath_follow.h:
--------------------------------------------------------------------------------
 1 | #ifndef GPATH_FOLLOW_H_
 2 | #define GPATH_FOLLOW_H_
 3 | 
 4 | #include "dna.h"
 5 | #include "gpath.h"
 6 | 
 7 | /*
 8 | 
 9 | Link 'age' vs 'pos'
10 | 
11 |    \  \    /      /
12 |  ___\__\__/___X__/_
13 |  a ^-------------->
14 |     b ^----------->
15 | 
16 | Links start at (a) and at (b). At (X) both links have pos=1, but link (a) has
17 | age=2 and (b) has age=1.
18 | 
19 | */
20 | 
21 | // This struct is packed so we can hash it quickly
22 | struct GPathFollowStruct
23 | {
24 |   const GPath *gpath;
25 |   uint16_t pos, len;
26 |   uint32_t age; // age is >= pos
27 |   // A small buffer of upcoming 24 bases
28 |   // uint16_t first_cached; // first base in buffer (multiple of 4: 0,4,8,...)
29 |   // uint8_t cache[6]; // first..first+23 (24 bases)
30 | } __attribute__((packed));
31 | 
32 | typedef struct GPathFollowStruct GPathFollow;
33 | 
34 | #include "madcrowlib/madcrow_buffer.h"
35 | madcrow_buffer(gpath_follow_buf,GPathFollowBuffer,GPathFollow);
36 | 
37 | #define gpath_follow_get_base(path,pos) (binary_seq_get((path)->gpath->seq,pos))
38 | // Nucleotide gpath_follow_get_base(GPathFollow *path, size_t pos);
39 | GPathFollow gpath_follow_create(const GPath *gpath);
40 | 
41 | #endif /* GPATH_FOLLOW_H_ */
42 | 


--------------------------------------------------------------------------------
/src/basic/decomp_bubble.h:
--------------------------------------------------------------------------------
 1 | #ifndef DECOMP_BUBBLE_H_
 2 | #define DECOMP_BUBBLE_H_
 3 | 
 4 | #include "aligned_call.h"
 5 | #include "call_file_reader.h"
 6 | #include "seq_reader.h" // genome hash
 7 | 
 8 | typedef struct {
 9 |   uint64_t nflank5p_unmapped, nflank5p_lowqual;
10 |   uint64_t nflank3p_multihits, nflank3p_not_found;
11 |   uint64_t nflank3p_exact_found, nflank3p_approx_found;
12 |   uint64_t nflanks_overlap_too_much;
13 |   uint64_t ncalls, ncalls_mapped;
14 | } DecompBubbleStats;
15 | 
16 | typedef struct DecompBubbleStruct DecompBubble;
17 | 
18 | DecompBubble* decomp_bubble_init();
19 | void decomp_bubble_destroy(DecompBubble *db);
20 | 
21 | void decomp_bubble_cpy_stats(DecompBubbleStats *stats, const DecompBubble *db);
22 | scoring_t* decomp_bubble_get_scoring(DecompBubble *db);
23 | 
24 | // Convert a call into an aligned call
25 | // return 0 on success, otherwise non-zero on failure
26 | int decomp_bubble_call(DecompBubble *db, ChromHash *genome,
27 |                        size_t kmer_size, size_t min_mapq,
28 |                        const CallFileEntry *centry,
29 |                        const bam1_t *mflank, const bam_hdr_t *bhdr,
30 |                        AlignedCall *ac);
31 | 
32 | #endif /* DECOMP_BUBBLE_H_ */
33 | 


--------------------------------------------------------------------------------
/src/tools/generate_paths.h:
--------------------------------------------------------------------------------
 1 | #ifndef GENERATE_PATHS_H_
 2 | #define GENERATE_PATHS_H_
 3 | 
 4 | #include "seq_file/seq_file.h"
 5 | 
 6 | #include "cortex_types.h"
 7 | #include "db_graph.h"
 8 | #include "seq_loading_stats.h"
 9 | #include "correct_aln_input.h"
10 | 
11 | typedef struct GenPathWorker GenPathWorker;
12 | 
13 | // Estimate memory required per worker thread
14 | size_t gen_paths_worker_est_mem(const dBGraph *db_graph);
15 | 
16 | GenPathWorker* gen_paths_workers_alloc(size_t n, dBGraph *graph);
17 | 
18 | void gen_paths_workers_dealloc(GenPathWorker *mem, size_t n);
19 | 
20 | // Add a single contig using a given worker
21 | void gen_paths_worker_seq(GenPathWorker *wrkr, AsyncIOData *data,
22 |                           const CorrectAlnInput *task);
23 | 
24 | // For testing
25 | void gen_paths_from_str_mt(GenPathWorker *gen_path_wrkr, char *seq,
26 |                            CorrectAlnParam params);
27 | 
28 | // workers array must be at least as long as tasks
29 | void generate_paths(CorrectAlnInput *tasks, size_t num_tasks,
30 |                     GenPathWorker *workers, size_t num_workers);
31 | 
32 | CorrectAlnStats* gen_paths_get_aln_stats(GenPathWorker *wrkr);
33 | SeqLoadingStats* gen_paths_get_stats(GenPathWorker *wrkr);
34 | 
35 | #endif /* GENERATE_PATHS_H_ */
36 | 


--------------------------------------------------------------------------------
/tests/clean_graph/clean2/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL:=/bin/bash -euo pipefail
 2 | 
 3 | K=17
 4 | CTXDIR=../../..
 5 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K)
 6 | CTX2DOT=$(CTXDIR)/scripts/perl/mccortex-graph-to-graphviz.pl
 7 | 
 8 | GRAPHS=seq.k$(K).raw.ctx seq.k$(K).clean.ctx
 9 | DOTS=$(GRAPHS:.ctx=.dot) $(GRAPHS:.ctx=.unitigs.dot)
10 | PLOTS=$(DOTS:.dot=.pdf)
11 | 
12 | all: seq.fa $(GRAPHS) $(DOTS)
13 | 
14 | plots: $(PLOTS)
15 | 
16 | seq.fa: Makefile
17 | 	# Repeat of CAAAGGCCTCACGGGTA
18 | 	echo GTGAGGCCAAGCAAAGGCCTCACGGGTACAAAGGCCTCACGGGTAGAATCCCCTTTG > seq.fa
19 | 	echo GTGAGGCCAAGCAAAGGCCTCACGGGTAGAATCCCCTTTG >> seq.fa
20 | 	echo GTGAGGCCAAGCAAAGGCCTCACGGGTAGAATCCCCTTTG >> seq.fa
21 | 	echo AAAAAAAAAAAAAAAAATAAAAAAAAAAAAAAAAA >> seq.fa
22 | 
23 | seq.k$(K).raw.ctx: seq.fa
24 | 	$(MCCORTEX) build -q -m 10M -k $(K) --sample SeqJr --seq $< $@
25 | 	$(MCCORTEX) check -q $@
26 | 
27 | seq.k$(K).clean.ctx: seq.k$(K).raw.ctx
28 | 	$(MCCORTEX) clean -q --unitigs=2 -o $@ $<
29 | 	$(MCCORTEX) check -q $@
30 | 
31 | %.unitigs.dot: %.ctx
32 | 	$(MCCORTEX) unitigs -q -m 1M --dot $< > $@
33 | 	# $(CTX2DOT) --simplify $< > $@
34 | 
35 | %.dot: %.ctx
36 | 	$(CTX2DOT) $< > $@
37 | 
38 | %.pdf: %.dot
39 | 	dot -Tpdf $< > $@
40 | 
41 | clean:
42 | 	rm -rf seq.fa $(GRAPHS) $(DOTS) $(PLOTS)
43 | 
44 | .PHONY: all plots clean
45 | 


--------------------------------------------------------------------------------
/tests/breakpoint/breakpoint0/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Sanity check that calling breakpoints with only a ref and reads from the ref
 3 | # gives no calls
 4 | #
 5 | 
 6 | SHELL:=/bin/bash -euo pipefail
 7 | 
 8 | CTXDIR=../../..
 9 | CTXPIPELINE=$(CTXDIR)/scripts/make-pipeline.pl
10 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat
11 | READSIM=$(CTXDIR)/libs/readsim/readsim
12 | VCFCOUNT=$(CTXDIR)/libs/biogrok/vcf-count
13 | 
14 | REFLEN=1000
15 | K=31
16 | SEQDEPTH=30
17 | READLEN=100
18 | OUTDIR=proj
19 | 
20 | all: run
21 | 
22 | ref.fa:
23 | 	$(DNACAT) -n $(REFLEN) -M <(echo ref) -F > $@
24 | 
25 | reads/reads.fa.gz: ref.fa
26 | 	mkdir -p reads
27 | 	$(READSIM) -r ref.fa -l $(READLEN) -s -d $(SEQDEPTH) reads/reads
28 | 
29 | task.k$(K).mk:
30 | 	echo "RefReads reads/reads.fa.gz" | $(CTXPIPELINE) -r ref.fa $(K) proj - > $@
31 | 
32 | run: task.k$(K).mk reads/reads.fa.gz ref.fa
33 | 	$(MAKE) -f $< CTXDIR=$(CTXDIR) breakpoints-vcf
34 | 	@# Check no VCF entries
35 | 	(( `$(VCFCOUNT) proj/vcfs/breakpoints.joint.links.k$(K).vcf.gz` == 0 )) || false
36 | 	@# Check no breakpoint call entries
37 | 	(( `grep -c '>brkpnt' proj/k$(K)/breakpoints_links/joint.brk.gz` == 0 )) || false
38 | 	@echo 'Success: no breakpoint calls or VCF entries!'
39 | 
40 | clean:
41 | 	rm -rf ref.fa* reads proj task.k$(K).mk
42 | 
43 | .PHONY: all run clean
44 | 


--------------------------------------------------------------------------------
/tests/pop_bubbles/pop_bubbles1/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/bash -euo pipefail
 2 | 
 3 | # Test pop bubbles with a single sample input file
 4 | 
 5 | K=21
 6 | CTXDIR=../../..
 7 | MCCORTEX=$(shell echo $(CTXDIR)/bin/mccortex$$[(($(K)+31)/32)*32 - 1])
 8 | 
 9 | SEQS=seq.fa truth.fa
10 | GRAPHS=seq.ctx popped.ctx truth.ctx
11 | 
12 | all: popped.ctx truth.ctx check
13 | 
14 | seq.fa:
15 | 	( echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAGATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; \
16 | 		echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAGATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; \
17 | 	  echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAcATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; ) > $@
18 | 
19 | truth.fa:
20 | 	( echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAGATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; \
21 | 		echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAGATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; \
22 | 		echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGA; echo ATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; ) > $@
23 | 
24 | %.ctx: %.fa
25 | 	$(MCCORTEX) build -q -k $(K) --sample $* --seq $< $@
26 | 
27 | popped.ctx: seq.ctx
28 | 	$(MCCORTEX) popbubbles -q --out $@ $<
29 | 
30 | check: truth.ctx popped.ctx
31 | 	diff -q <($(MCCORTEX) view -qk popped.ctx | sort) <($(MCCORTEX) view -qk truth.ctx | sort) && \
32 | 	echo "Kmers match."
33 | 
34 | clean:
35 | 	rm -rf $(SEQS) $(GRAPHS)
36 | 
37 | .PHONY: all clean check
38 | 


--------------------------------------------------------------------------------
/scripts/perl/bubbles-to-contigs.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use File::Basename;
 7 | 
 8 | # Use current directory to find modules
 9 | use FindBin;
10 | use lib $FindBin::Bin;
11 | 
12 | use McCortexBubbles;
13 | 
14 | sub print_usage
15 | {
16 |   for my $err (@_) { print STDERR "Error: $err\n"; }
17 | 
18 |   print STDERR "" .
19 | "Usage: $0 <bub.txt.gz>\n" .
20 | "  Print a contig for each bubble branch. Prints to STDOUT.\n" .
21 | "  Contigs are named: >BUBBLENAME.branchB\n" .
22 | "  where B is the branch number\n";
23 | 
24 |   exit(-1);
25 | }
26 | 
27 | if(@ARGV != 1) { print_usage(); }
28 | my ($file) = (@ARGV,"-");
29 | my $fh;
30 | open($fh, "gzip -fcd $file |") or die("Cannot read file $file: $!");
31 | 
32 | my $cb = new McCortexBubbles($fh);
33 | my ($seq5p, $seq3p, $branches, $flank5p_nkmers, $flank3p_nkmers, $branchlens, $callid);
34 | 
35 | while(1)
36 | {
37 |   ($seq5p, $seq3p, $branches,
38 |    $flank5p_nkmers, $flank3p_nkmers, $branchlens, $callid) = $cb->next();
39 |   if(!defined($seq5p)) { last; }
40 | 
41 |   my ($len5p,$len3p) = (length($seq5p), length($seq3p));
42 | 
43 |   for(my $i = 0; $i < @$branches; $i++) {
44 |     print ">$callid.branch$i:$len5p:$len3p\n";
45 |     print $seq5p.$branches->[$i].$seq3p."\n";
46 |   }
47 | }
48 | 
49 | close($fh);
50 | 


--------------------------------------------------------------------------------
/travis/provision-vm.sh:
--------------------------------------------------------------------------------
 1 | sudo apt-get update
 2 | sudo apt-get install -y g++ libncurses5-dev python-dev python3-dev emacs cmake autoconf
 3 | 
 4 | # Stampy
 5 | cd
 6 | curl -O http://www.well.ox.ac.uk/~gerton/software/Stampy/stampy-latest.tgz
 7 | tar xfz stampy-latest.tgz
 8 | cd stampy
 9 | make
10 | 
11 | # VCFTools
12 | cd
13 | wget https://downloads.sourceforge.net/project/vcftools/vcftools_0.1.13.tar.gz
14 | tar xfz vcftools_0.1.13.tar.gz
15 | cd vcftools_0.1.13
16 | make
17 | 
18 | # Cortex
19 | cd
20 | git clone --recursive https://github.com/iqbal-lab/cortex.git
21 | cd cortex
22 | bash install.sh
23 | for k in 31 63 95 127; do
24 |   for ncol in 1 2 3 9 10 11; do
25 |     make cortex_var MAXK=$k NCOLS=$ncol
26 |   done
27 | done
28 | echo 'export PERL5LIB="${HOME}/cortex/scripts/analyse_variants/bioinf-perl/lib/:${HOME}/cortex/scripts/calling/:${PERL5LIB}"' >> .profile
29 | echo 'export PATH="${HOME}/cortex/scripts/analyse_variants/needleman_wunsch/:${PATH}"' >> .profile
30 | 
31 | # McCortex
32 | cd
33 | git clone --recursive -b develop https://github.com/mcveanlab/mccortex.git
34 | cd mccortex
35 | cd libs && make all && cd ..
36 | for k in 31 63 95 127; do
37 |   make all test MAXK=31
38 | done
39 | 
40 | # Freebayes
41 | cd
42 | git clone --recursive https://github.com/ekg/freebayes.git
43 | cd freebayes
44 | make
45 | 


--------------------------------------------------------------------------------
/scripts/perl/breakpoints-example.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use File::Basename;
 7 | 
 8 | # Use current directory to find modules
 9 | use FindBin;
10 | use lib $FindBin::Bin;
11 | 
12 | use McCortexBreakpoints;
13 | 
14 | sub print_usage
15 | {
16 |   for my $err (@_) { print STDERR "Error: $err\n"; }
17 |   
18 |   print STDERR "" .
19 | "Usage: $0 <brk.gz>\n";
20 | 
21 |   exit(-1);
22 | }
23 | 
24 | if(@ARGV > 1) { print_usage(); }
25 | my ($file) = (@ARGV, "-");
26 | my $fh;
27 | open($fh, "gzip -fcd $file |") or die("Cannot read file $file: $!");
28 | 
29 | my $cb = new McCortexBreakpoints($fh,$file);
30 | my ($seq5p, $seq3p, $pathseq, $flank5p_refs, $flank3p_refs, $cols, $callid);
31 | 
32 | while(1)
33 | {
34 |   ($seq5p, $seq3p, $pathseq, $flank5p_refs, $flank3p_refs, $cols, $callid) = $cb->next();
35 |   if(!defined($seq5p)) { last; }
36 | 
37 |   my @strs5p = map {$_->{'chrom'}.":".$_->{'start'}.'-'.$_->{'end'}} @$flank5p_refs;
38 |   my @strs3p = map {$_->{'chrom'}.":".$_->{'start'}.'-'.$_->{'end'}} @$flank3p_refs;
39 | 
40 |   print "$callid\n";
41 |   print ">flank5p chrs=".join(',', @strs5p)."\n$seq5p\n";
42 |   print ">flank3p chrs=".join(',', @strs3p)."\n$seq3p\n";
43 |   print ">path cols=".join(',', @$cols)."\n$pathseq\n";
44 |   print "\n";
45 | }
46 | 
47 | close($fh);
48 | 


--------------------------------------------------------------------------------
/src/graph/graph_step.c:
--------------------------------------------------------------------------------
 1 | #include "global.h"
 2 | #include "graph_step.h"
 3 | #include "util.h"
 4 | 
 5 | /*
 6 |   This file contains the struct and constants used to record the behaviour of
 7 |   the GraphWalker at each "step".
 8 | */
 9 | 
10 | const char *graph_step_str[GRPHWLK_NUM_STATES] = {GRPHWLK_POPFWD_STR,
11 |                                                   GRPHWLK_COLFWD_STR,
12 |                                                   GRPHWLK_POPFRK_COLFWD_STR,
13 |                                                   GRPHWLK_NOCOVG_STR,
14 |                                                   GRPHWLK_NOCOLCOVG_STR,
15 |                                                   GRPHWLK_NOLINKS_STR,
16 |                                                   GRPHWLK_SPLIT_LINKS_STR,
17 |                                                   GRPHWLK_MISSING_LINKS_STR,
18 |                                                   GRPHWLK_USELINKS_STR};
19 | 
20 | char* graph_step_status2str(enum GraphStepStatus status, char *str, size_t len)
21 | {
22 |   ctx_assert(len >= 20); (void)len;
23 |   ctx_assert(status < GRPHWLK_NUM_STATES);
24 |   strcpy(str, graph_step_str[status]);
25 |   return str;
26 | }
27 | 
28 | void graph_step_print_state_hist(const size_t hist[GRPHWLK_NUM_STATES])
29 | {
30 |   util_print_nums(graph_step_str, hist, GRPHWLK_NUM_STATES, 30);
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/bubbles/bubbles2/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/bash -euo pipefail
 2 | 
 3 | #
 4 | # Test we call all bubbles when filtering ref bubbles
 5 | #
 6 | 
 7 | 
 8 | CTXDIR=../../..
 9 | MCCORTEX31=$(CTXDIR)/bin/mccortex31
10 | VCFNALLELES=$(CTXDIR)/libs/biogrok/vcf-count-alleles
11 | CTXFLANKS=$(CTXDIR)/scripts/cortex_print_flanks.sh
12 | VCFSORT=$(CTXDIR)/libs/biogrok/vcf-sort
13 | BWA=$(CTXDIR)/libs/bwa/bwa
14 | 
15 | K=31
16 | 
17 | SAMPLES=itchy scratchy ref
18 | FASTAS=$(SAMPLES:=.fa)
19 | GRAPHS=$(SAMPLES:=.k$(K).ctx)
20 | 
21 | all: bubbles.txt test
22 | 
23 | itchy.fa: scratchy.fa
24 | scratchy.fa: ref.fa
25 | ref.fa:
26 | 	python make-exhaustive-alleles.py itchy.fa scratchy.fa ref.fa
27 | 
28 | %.k$(K).ctx: %.fa
29 | 	$(MCCORTEX31) build -k $(K) --sample "$*" --seq $< $@ >& $@.log
30 | 
31 | bubbles.txt.log: bubbles.txt
32 | bubbles.txt: $(GRAPHS)
33 | 	$(MCCORTEX31) bubbles -o $@.gz --haploid 2 $(GRAPHS) >& $@.log
34 | 	gzip -fd $@.gz
35 | 
36 | # expect 60 bubbles called = (2**4-1)*2*2
37 | # expect 64 haploid bubbles = (2**4)*2*2 (i.e. bubbles in ref ignored)
38 | test: bubbles.txt.log
39 | 	grep -q 'Bubble Caller called 60 bubbles' $<
40 | 	grep -q 'Haploid bubbles dropped: 64' $<
41 | 	grep -q 'Serial bubbles dropped: 0' $<
42 | 	@echo "=> all looks good."
43 | 
44 | clean:
45 | 	rm -rf $(GRAPHS) bubbles.txt *.log
46 | 
47 | .PHONY: all clean test
48 | 


--------------------------------------------------------------------------------
/src/global/ctx_assert.c:
--------------------------------------------------------------------------------
 1 | #include "global.h"
 2 | #include "ctx_assert.h"
 3 | 
 4 | //
 5 | // Checks and asserts
 6 | //
 7 | 
 8 | static void ctx_assertf2(const char *file, const char *func, int line,
 9 |                          const char *asserttxt, const char *fmt, va_list argptr)
10 | {
11 |   pthread_mutex_lock(&ctx_biglock);
12 |   fflush(stdout);
13 |   fprintf(stderr, "[%s:%i] Assert Failed %s(): %s", file, line, func, asserttxt);
14 | 
15 |   if(fmt != NULL) {
16 |     fputs(": ", stderr);
17 |     vfprintf(stderr, fmt, argptr);
18 |   }
19 | 
20 |   // Print a timestamp so we know when the crash occurred
21 |   fprintf(stderr, "\n");
22 |   timestampf(stderr);
23 |   fputs(" Assert Error\n", stderr);
24 |   fflush(stderr);
25 |   pthread_mutex_unlock(&ctx_biglock);
26 | }
27 | 
28 | void ctx_assertf_no_abort(const char *file, const char *func, int line,
29 |                           const char *asserttxt, const char *fmt, ...)
30 | {
31 |   va_list argptr;
32 |   va_start(argptr, fmt);
33 |   ctx_assertf2(file, func, line, asserttxt, fmt, argptr);
34 |   va_end(argptr);
35 | }
36 | 
37 | void ctx_assertf(const char *file, const char *func, int line,
38 |                  const char *asserttxt, const char *fmt, ...)
39 | {
40 |   va_list argptr;
41 |   va_start(argptr, fmt);
42 |   ctx_assertf2(file, func, line, asserttxt, fmt, argptr);
43 |   va_end(argptr);
44 |   abort();
45 | }
46 | 


--------------------------------------------------------------------------------
/scripts/analysis/mapping-vars-test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eou pipefail
 4 | 
 5 | if [ $# -ne 4 ]; then
 6 |   echo "Usage: $0 <in.vcf.gz> <ref.fa> <truth.fa> <out-dir>" 1>&2
 7 |   echo "  writes: OUT.fa, OUT.sam, OUT.stats.txt, OUT.sites.txt, OUT.sites.vcf.gz" 1>&2
 8 |   echo "  sites that map + pass are in: OUT.vcf.gz" 1>&2
 9 |   exit -1
10 | fi
11 | set -o xtrace
12 | 
13 | CTXDIR=$( cd $( dirname ${BASH_SOURCE[0]} ) && cd ../.. && pwd )
14 | BWA=$CTXDIR/libs/bwa/bwa
15 | BGZIP=$CTXDIR/libs/htslib/bgzip
16 | BCFTOOLS=$CTXDIR/libs/bcftools/bcftools
17 | VCFCONTIGS=$CTXDIR/libs/vcf-slim/bin/vcfcontigs
18 | SAM2VCF=$CTXDIR/libs/vcf-slim/scripts/sam-name-to-vcf.sh
19 | VCFRENAME=$CTXDIR/libs/biogrok/vcf-rename
20 | VCF_SELECT_ID=$CTXDIR/libs/biogrok/vcf-select-id
21 | SAMCMP=$CTXDIR/scripts/analysis/haploid-sam-compare.py
22 | 
23 | 
24 | INVCF=$1
25 | REF=$2
26 | TRUTHFA=$3
27 | PREFIX=$4
28 | 
29 | OUTFASTA=$PREFIX.fa
30 | OUTSAM=$PREFIX.sam
31 | OUTSTATS=$PREFIX.stats.txt
32 | OUTSITES=$PREFIX.sites.txt
33 | RENAMEDVCF=$PREFIX.renamed.vcf.gz
34 | OUTVCF=$PREFIX.vcf.gz
35 | 
36 | mkdir -p $(dirname $OUTFASTA)
37 | 
38 | $VCFRENAME $INVCF > $RENAMEDVCF
39 | $VCFCONTIGS --trim --no-ref 50 $REF $RENAMEDVCF > $OUTFASTA
40 | $BWA mem $TRUTHFA $OUTFASTA > $OUTSAM
41 | $SAMCMP --print-valid $OUTSITES $OUTSAM > $OUTSTATS
42 | $VCF_SELECT_ID <(cut -d: -f3 $OUTSITES) $RENAMEDVCF | $BGZIP -c > $OUTVCF
43 | 


--------------------------------------------------------------------------------
/src/basic/range.h:
--------------------------------------------------------------------------------
 1 | #ifndef RANGE_H_
 2 | #define RANGE_H_
 3 | 
 4 | /*
 5 |  * Valid ranges are:
 6 |  *   *
 7 |  *   1
 8 |  *   3
 9 |  *   2-4
10 |  *   1,1-3,2
11 |  */
12 | 
13 | /**
14 |  * Parse range string and return number of items
15 |  *
16 |  * @return number of items in range, or -1 if there is a syntax error
17 |  */
18 | int range_get_num(const char *str, size_t range_max);
19 | 
20 | /**
21 |  * Parse range string into array arr
22 |  *
23 |  * @param str nul terminated string to parse
24 |  * @param arr place parsed array here
25 |  * @param range_max max value permitted in the array
26 |  * @return 0 on success, -1 on error
27 |  */
28 | int range_parse_array(const char *str, size_t *arr, size_t range_max);
29 | 
30 | /**
31 |  * Parse range into array arr, filling array to ensure exactly a given number
32 |  * of entries. If empty, array is filled 0..num_entries-1, if only one entry,
33 |  * array is filled with same entry num_entries times
34 |  *
35 |  * @param str nul terminated string to parse
36 |  * @param arr place parsed array here
37 |  * @param range_max max value permitted in the array
38 |  * @param num_entries Force exactly `num_entries` to be placed in `arr`
39 |  * @return 0 on success, -1 on error
40 |  */
41 | int range_parse_array_fill(const char *str, size_t *arr,
42 |                            size_t range_max, size_t num_entries);
43 | 
44 | #endif /* RANGE_H_ */
45 | 


--------------------------------------------------------------------------------
/results/klebsiella/kleb_pneumoniae/large_events/large-events-plot.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript --vanilla
 2 | 
 3 | #
 4 | # Dot plot of ref allele length vs sample allele length
 5 | #
 6 | 
 7 | args <- commandArgs(trailingOnly=TRUE)
 8 | if(length(args) != 1) {
 9 |   stop("Usage: Rscript --vanilla large-events-plot.R <stats.txt>\n")
10 | }
11 | 
12 | file <- args[1]
13 | #file <- 'bubbles50K/stats.txt'
14 | 
15 | title <- expression(paste(italic('klebsiella pneumoniae'),' large event sizes'))
16 | xlabel <- 'Reference length (kbp)'
17 | ylabel <- 'Sample length (kbp)'
18 | 
19 | r <- read.table(file,sep='\t',head=F,comment.char='#')
20 | r <- r / 1000
21 | 
22 | # Get maximum and round to nearest 5
23 | lim <- max(r[,1],r[,2])
24 | lim <- floor((ceiling(lim)+4)/5)*5
25 | 
26 | pdf(file='kleb_large_events_R_log.pdf', width=6, height=6)
27 | plot(r, xlab=xlabel, ylab=ylabel, log="xy",
28 |      main=title, xlim=c(0,lim), ylim=c(0,lim))
29 | dev.off()
30 | 
31 | # With ggplot
32 | library('ggplot2')
33 | library('reshape')
34 | library('scales')
35 | library('plyr')
36 | 
37 | df <- data.frame(ref=r[,1], sample=r[,2])
38 | 
39 | p <- ggplot(df, aes(x=ref, y=sample)) +
40 |      geom_point(shape=1) +
41 |      scale_x_log10() + scale_y_log10() +
42 |      xlim(0,lim) + ylim(0,lim) +
43 |      ggtitle(title) + xlab(xlabel) + ylab(ylabel)
44 | 
45 | ggsave(p, file='kleb_large_events_ggplot_log.pdf', width=6, height=6)
46 | 


--------------------------------------------------------------------------------
/tests/path_check/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL:=/bin/bash -euo pipefail
 2 | 
 3 | CTXDIR=../..
 4 | MCCORTEX=$(CTXDIR)/bin/mccortex31
 5 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat
 6 | K=7
 7 | KEEP=$(shell echo genome.fa genome.k$(K).{ctx,ctp} reads.1.fa.gz reads.2.fa.gz reads.{se,pe}.k$(K).ctp)
 8 | PLOTS=genome.k$(K).dot genome.k$(K).pdf
 9 | 
10 | all: $(KEEP)
11 | 
12 | plots: $(PLOTS)
13 | 
14 | clean:
15 | 	rm -rf $(KEEP) $(PLOTS)
16 | 
17 | # Sample random genome
18 | genome.fa:
19 | 	$(DNACAT) -F -n 200 > genome.fa
20 | 
21 | genome.k$(K).ctx: genome.fa
22 | 	$(MCCORTEX) build -q -m 10M -k $(K) --sample MssrGenome --seq $< $@
23 | 
24 | genome.k$(K).dot: genome.k$(K).ctx
25 | 	$(MCCORTEX) unitigs -q --dot $< > $@
26 | 
27 | genome.k$(K).pdf: genome.k$(K).dot
28 | 	dot -Tpdf $< > $@
29 | 
30 | reads.1.fa.gz reads.2.fa.gz:
31 | 	../../libs/readsim/readsim -r genome.fa -l 10 -i 20 -v 0.1 -d 2 reads
32 | 
33 | genome.k$(K).ctp: genome.k$(K).ctx
34 | 	$(MCCORTEX) thread -q -m 10M -t 1 --seq genome.fa -o $@ $<
35 | 
36 | reads.se.k$(K).ctp: genome.k$(K).ctx reads.1.fa.gz reads.2.fa.gz
37 | 	$(MCCORTEX) thread -q -m 10M -t 1 --seq reads.1.fa.gz --seq reads.2.fa.gz -o reads.se.k$(K).ctp genome.k$(K).ctx
38 | 
39 | reads.pe.k$(K).ctp: genome.k$(K).ctx reads.1.fa.gz reads.2.fa.gz
40 | 	$(MCCORTEX) thread -q -m 10M -t 1 --seq2 reads.1.fa.gz:reads.2.fa.gz -o reads.pe.k$(K).ctp genome.k$(K).ctx
41 | 
42 | .PHONY: all plots clean
43 | 


--------------------------------------------------------------------------------
/results/data/chr22/uniq_flanks/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL:=/bin/bash -euo pipefail
 2 | #
 3 | # Isaac Turner
 4 | # 2014-09-05
 5 | #
 6 | 
 7 | CTXDIR=../../../..
 8 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat
 9 | BWA=bwa
10 | SAMTOOLS=samtools
11 | 
12 | REF=../chr22_17M_18M.fa
13 | 
14 | K0=GGTCGCACACAAATACTACGGGCATTGGATGCGACCATAAGTCTTGACAGGCTTTGTTCCCT
15 | K1=ACAACTTAACCTGGAACTAGAACTAATTTATGAGCGAGCCAGAACAGGTAGTCTGAGGGAGT
16 | 
17 | K0s=$(shell echo $(K0) | awk '{print substr($$0,1,11)}')
18 | K1s=$(shell echo $(K1) | awk '{print substr($$0,length($$0)-10,11)}')
19 | K0r=$(shell echo $(K0s) | $(DNACAT) -P -r -)
20 | K1r=$(shell echo $(K1s) | $(DNACAT) -P -r -)
21 | 
22 | all: chr22.1Mbp.uniq.fa chr22.1Mbp.uniq.fa.bwt chr22.1Mbp.uniq.fa.fai check
23 | 
24 | chr22.1Mbp.uniq.fa:
25 | 	(echo '>chr22_17M_18M.11bp.and.up.uniq.flanks'; \
26 | 	 echo $(K0); $(DNACAT) -P $(REF); echo $(K1);) | \
27 | 	  $(DNACAT) -F -w 80 - > $@
28 | 
29 | chr22.1Mbp.uniq.fa.bwt: chr22.1Mbp.uniq.fa
30 | 	$(BWA) index $<
31 | 
32 | chr22.1Mbp.uniq.fa.fai: chr22.1Mbp.uniq.fa
33 | 	$(SAMTOOLS) faidx $<
34 | 
35 | clean:
36 | 	rm -rf chr22.1Mbp.uniq.fa*
37 | 
38 | check: chr22.1Mbp.uniq.fa
39 | 	@echo; echo Command should only print two kmers:
40 | 	$(DNACAT) -P chr22.1Mbp.uniq.fa | grep -ioE '($(K0s)|$(K0r)|$(K1s)|$(K1r))' -
41 | 	@echo; echo Check file contains exactly 1Mbp+2*62:
42 | 	$(DNACAT) -s chr22.1Mbp.uniq.fa
43 | 
44 | .PHONY: all clean check
45 | 


--------------------------------------------------------------------------------
/tests/unitigs/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL:=/bin/bash -euo pipefail
 2 | 
 3 | #
 4 | # Test unitigs command by generating 200 random DNA bases, building cortex graph
 5 | # then generating untigs with various output options
 6 | #
 7 | 
 8 | K=7
 9 | CTXDIR=../..
10 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K)
11 | CTX2DOT=$(CTXDIR)/scripts/perl/mccortex-graph-to-graphviz.pl
12 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat
13 | 
14 | FILES=genome.fa genome.k$(K).ctx
15 | UNITIGS=genome.k$(K).unitigs.fa genome.k$(K).unitigs.dot genome.k$(K).unitigs.gfa
16 | PLOTS=genome.k$(K).unitigs.dot genome.k$(K).kmers.dot
17 | PDFS=$(PLOTS:.dot=.pdf)
18 | 
19 | TGTS=$(FILES) $(UNITIGS) $(PLOTS)
20 | 
21 | all: $(TGTS)
22 | 
23 | clean:
24 | 	rm -rf $(TGTS) $(PDFS)
25 | 
26 | # Sample random genome
27 | genome.fa:
28 | 	$(DNACAT) -F -n 200 > genome.fa
29 | 
30 | genome.k$(K).ctx: genome.fa
31 | 	$(MCCORTEX) build -q -m 1M -k $(K) --sample MssrGenome --seq $< $@
32 | 
33 | genome.k$(K).unitigs.fa: genome.k$(K).ctx
34 | 	$(MCCORTEX) unitigs -q -m 1M -o $@ $<
35 | 
36 | genome.k$(K).unitigs.dot: genome.k$(K).ctx
37 | 	$(MCCORTEX) unitigs -q -m 1M --dot --points $< > $@
38 | 
39 | genome.k$(K).unitigs.gfa: genome.k$(K).ctx
40 | 	$(MCCORTEX) unitigs -q -m 1M --gfa $< > $@
41 | 
42 | genome.k$(K).kmers.dot: genome.k$(K).ctx
43 | 	$(CTX2DOT) $< > $@
44 | 
45 | %.pdf: %.dot
46 | 	dot -Tpdf $< > $@
47 | 
48 | plots: $(PDFS)
49 | 
50 | .PHONY: all clean plots
51 | 


--------------------------------------------------------------------------------
/src/basic/chrom_pos_list.h:
--------------------------------------------------------------------------------
 1 | #ifndef CHROM_POS_LIST_H_
 2 | #define CHROM_POS_LIST_H_
 3 | 
 4 | // ChromPosOffset coords are read/printed 1-based, stored 0-based
 5 | typedef struct
 6 | {
 7 |   char *chrom;
 8 |   size_t start, end, offset; // 0-based; start < end; end not inclusive
 9 |   bool fw_strand;
10 | } ChromPosOffset;
11 | 
12 | #include "madcrowlib/madcrow_buffer.h"
13 | madcrow_buffer(chrompos_buf, ChromPosBuffer, ChromPosOffset);
14 | 
15 | // Sort by length, chrom, strand (fw,rv), start
16 | int chrom_pos_cmp_len(const void *aa, const void *bb);
17 | 
18 | // Validate a chrom position object
19 | void chrom_pos_validate(const ChromPosOffset *pos);
20 | #define chrom_pos_len(pos) ((pos)->end - (pos)->start)
21 | 
22 | /**
23 |  * Get largest match
24 |  * @param buf        List of chromosome positions to search
25 |  * @param pos        Copy largest to here
26 |  * @param use_first  If more than largest, return first, otherwise return last
27 |  * @return           Number of largest
28 |  */
29 | size_t chrom_pos_list_get_largest(const ChromPosBuffer *buf, bool use_first,
30 |                                   ChromPosOffset *pos);
31 | 
32 | // Parse a string in the form: chr:start-end:strand:offset[,...]
33 | // Return 0 on success, -1 on error
34 | int chrom_pos_list_parse(char *str, ChromPosBuffer *buf);
35 | 
36 | void chrom_pos_list_sort(ChromPosBuffer *buf);
37 | 
38 | #endif /* CHROM_POS_LIST_H_ */
39 | 


--------------------------------------------------------------------------------
/tests/pop_bubbles/pop_bubbles2/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/bash -euo pipefail
 2 | 
 3 | # Test pop bubbles with a multiple samples, multiple input files
 4 | 
 5 | K=21
 6 | CTXDIR=../../..
 7 | MCCORTEX=$(shell echo $(CTXDIR)/bin/mccortex$$[(($(K)+31)/32)*32 - 1])
 8 | 
 9 | SEQS=sample1.fa sample2.fa clean2.fa
10 | GRAPHS=sample1.ctx sample2.ctx popped.ctx truth.ctx
11 | 
12 | all: popped.ctx truth.ctx check
13 | 
14 | sample1.fa:
15 | 	( echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAGATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; \
16 | 		echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAGATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; ) > $@
17 | 
18 | sample2.fa:
19 | 	( echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGAcATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; ) > $@
20 | 
21 | clean2.fa:
22 | 	( echo CCTAGGGTGCAGTCAATTGCCAACGGTCGGGA; \
23 | 		echo ATAACTTCTCCAAACCAGGTTCATGACAGCCAACCAA; ) > $@
24 | 
25 | truth.ctx: sample1.ctx sample2.ctx clean2.fa
26 | 	$(MCCORTEX) build -q -k $(K) -s clean2 -1 clean2.fa - | \
27 | 	$(MCCORTEX) join -q -o $@ 0:sample1.ctx 1:-
28 | 
29 | %.ctx: %.fa
30 | 	$(MCCORTEX) build -q -k $(K) --sample $* --seq $< $@
31 | 
32 | popped.ctx: sample1.ctx sample2.ctx
33 | 	$(MCCORTEX) popbubbles --out -q $@ $^
34 | 
35 | check: popped.ctx truth.ctx
36 | 	diff -q <($(MCCORTEX) view -q -k popped.ctx | sort) <($(MCCORTEX) view -q -k truth.ctx | sort) && \
37 | 	echo "Kmers match."
38 | 
39 | 
40 | clean:
41 | 	rm -rf $(SEQS) $(GRAPHS)
42 | 
43 | .PHONY: all clean check
44 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls5/truth.cov.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.2
 2 | ##FILTER=<ID=PASS,Description="All filters passed">
 3 | ##fileDate=20151014
 4 | ##reference=ref/ref.fa
 5 | ##contig=<ID=ref,length=200>
 6 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 7 | ##SAMPLE=<ID=wally,K11_kcov=3,K11_nkmers=205,mean_read_length=190>
 8 | ##FORMAT=<ID=K11R,Number=A,Type=Integer,Description="Coverage on ref (k=11): sum(kmer_covs) / exp_num_kmers">
 9 | ##FORMAT=<ID=K11A,Number=A,Type=Integer,Description="Coverage on alt (k=11): sum(kmer_covs) / exp_num_kmers">
10 | ##mccortex_9e268b2=<prev="NULL",cmd="../../../bin/mccortex31 vcfcov -m 10M -o calls.cov.vcf -r ../ref/ref.fa --max-nvars 5 -f --low-mem calls.vcf wally.k11.ctx",cwd="/Users/isaac/mccortex/tests/vcfcov/calls5",datetime="20151103-23:49:27",version=v0.0.3-386-g16563e1-dirty>
11 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	wally
12 | ref	7	.	G	T	.	PASS	.	K11R:K11A	4:0
13 | ref	15	.	GCAATACCCCCCGATGACGG	G	.	PASS	.	K11R:K11A	2:2
14 | ref	20	.	A	T	.	PASS	.	K11R:K11A	2:0
15 | ref	22	.	C	G	.	PASS	.	K11R:K11A	2:0
16 | ref	40	.	T	A	.	PASS	.	K11R:K11A	2:2
17 | ref	49	.	GG	CGAT	.	PASS	.	K11R:K11A	4:0
18 | ref	71	.	A	GAT	.	PASS	.	K11R:K11A	4:0
19 | ref	92	.	GCACCAGGG	AC	.	PASS	.	K11R:K11A	4:0
20 | ref	121	.	C	G	.	PASS	.	K11R:K11A	4:0
21 | ref	142	.	GGCACAGCA	TTACTCTTC	.	PASS	.	K11R:K11A	4:0
22 | ref	171	.	A	CTAG	.	PASS	.	K11R:K11A	4:0
23 | ref	192	.	CATCATAG	A	.	PASS	.	K11R:K11A	4:0
24 | 


--------------------------------------------------------------------------------
/src/alignment/correct_aln_input.h:
--------------------------------------------------------------------------------
 1 | #ifndef CORRECT_ALN_INPUT_H_
 2 | #define CORRECT_ALN_INPUT_H_
 3 | 
 4 | #include "seqout.h"
 5 | #include "cortex_types.h"
 6 | #include "correct_alignment.h"
 7 | #include "async_read_io.h"
 8 | 
 9 | #include "cJSON/cJSON.h"
10 | 
11 | typedef struct
12 | {
13 |   AsyncIOInput files;
14 |   uint8_t fq_cutoff, hp_cutoff;
15 |   ReadMateDir matedir;
16 |   CorrectAlnParam crt_params;
17 |   // Next two only set if outputting sequences per file, as in ctx_correct.c
18 |   char *out_base;
19 |   SeqOutput *output;
20 | } CorrectAlnInput;
21 | 
22 | #define CORRECT_ALN_INPUT_INIT {.fq_cutoff = 0, .hp_cutoff = 0,       \
23 |                                 .matedir = READPAIR_FR,               \
24 |                                 .crt_params = CORRECT_PARAMS_DEFAULT, \
25 |                                 .out_base = NULL, .output = NULL}
26 | 
27 | #include "madcrowlib/madcrow_buffer.h"
28 | madcrow_buffer(correct_aln_input_buf, CorrectAlnInputBuffer, CorrectAlnInput);
29 | 
30 | cJSON* correct_aln_input_json_hdr(const CorrectAlnInput *input);
31 | 
32 | void correct_aln_input_print(const CorrectAlnInput *c);
33 | 
34 | // Copy CorrectAlnInput to an array of AsyncIOInputs
35 | void correct_aln_input_to_asycio(AsyncIOInput *asyncio_tasks,
36 |                                  CorrectAlnInput *inputs,
37 |                                  size_t num_inputs);
38 | 
39 | #endif /* CORRECT_ALN_INPUT_H_ */
40 | 


--------------------------------------------------------------------------------
/scripts/R/plot-link-dist-cov.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript --vanilla
 2 | 
 3 | # Plot coverage matrix generated by e.g. 'mccortex31 links --covg-hist out.csv ...'
 4 | #
 5 | args <- commandArgs(trailingOnly=TRUE)
 6 | if(length(args) < 2 || length(args) > 5) {
 7 |   stop("Usage: ./plot-link-dist-cov.R <covg.csv> <out.pdf> [dist [cutoff [maxcov]]]\n")
 8 | }
 9 | 
10 | dist=1
11 | cutoff=0
12 | maxcov=0
13 | 
14 | input_csv <- args[1]
15 | output_pdf <- args[2]
16 | 
17 | if(length(args) >= 3) { dist <- as.numeric(args[3]) }
18 | if(length(args) >= 4) { cutoff <- as.numeric(args[4]) }
19 | if(length(args) >= 5) { maxcov <- as.numeric(args[5]) }
20 | 
21 | library('ggplot2')
22 | library('reshape')
23 | library('scales')
24 | library('plyr')
25 | 
26 | cat("input_csv='",input_csv,"'\n",sep='')
27 | cat("output_pdf='",output_pdf,"'\n",sep='')
28 | cat('dist=',dist,'\n',sep='')
29 | cat('maxcov=',maxcov,'\n',sep='')
30 | 
31 | r <- read.table(input_csv,sep=',',head=T,row.names=1,comment.char='#',as.is=T)
32 | 
33 | if(maxcov == 0) { maxcov=ncol(r) }
34 | maxcov<-min(ncol(r), maxcov)
35 | r <- r[,1:maxcov]
36 | 
37 | v<-as.numeric(r[dist,])
38 | d<-data.frame(x=1:maxcov, y=v)
39 | 
40 | p <- ggplot(d, aes(x=x, y=y)) + geom_line() +
41 |      xlab("Link coverage") +
42 |      ylab(paste("Number of links of length",(dist+1),"(kmers)")) +
43 |      geom_vline(xintercept=cutoff, color="red")
44 | 
45 | 
46 | ggsave(p, file=output_pdf, width=6, height=6)
47 | 


--------------------------------------------------------------------------------
/results/kmer_size_experiment/results/plot-link-counts.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript --vanilla
 2 | 
 3 | # Isaac Turner 2017-02-16
 4 | 
 5 | args <- commandArgs(trailingOnly=TRUE)
 6 | if(length(args) != 2) {
 7 |   stop("Usage: ./plot-link-counts.R <linkcounts.pdf> <linkcounts.csv>\n")
 8 | }
 9 | 
10 | plot_path <- "latest/perfect.linkcounts.se.pdf"
11 | csv_path <- "latest/perfect.linkcounts.se.csv"
12 | 
13 | plot_path = args[1]
14 | csv_path = args[2]
15 | 
16 | a <- read.table(csv_path, sep='\t',head=T,comment.char='#',as.is=T)
17 | 
18 | # Plotting parameters
19 | cols <- c('#1b9e77', '#d95f02', '#7570b3', 'red') # from color brewer
20 | pnts <- c(19,4,17,1) # point styles pch=
21 | jf <- 0.2 # jitter factor
22 | lt <- 2.5 # line thickness
23 | #
24 | 
25 | # * joins with no spaces, ~ joins with a space
26 | xlabel = expression(italic('k'))
27 | ylabel = expression('no. of '*italic('k')*'mers with links (log)')
28 | 
29 | # pdf(plot_path, width=6, height=6)
30 | quartz(type='pdf',file=plot_path,width=6,height=5)
31 | 
32 | # Remove empty title space
33 | par(mar=c(4,5,2,2)+0.1) # set margins: bottom, left, top and right
34 | par(xpd=TRUE)
35 | 
36 | par(mgp=c(4, 1, 0)) # axis label positions
37 | 
38 | plot(a$K, a$n_link_kmers, type='b', axes=F, log='y',
39 |      xlab='', ylab='', ylim=c(1,max(a$n_link_kmers)))
40 | 
41 | mtext(side=1, text=xlabel, line=2)
42 | mtext(side=2, text=ylabel, line=4)
43 | axis(1, at=a$K)
44 | axis(2, las=2)
45 | 
46 | dev.off()
47 | 


--------------------------------------------------------------------------------
/tests/breakpoint/breakpoint2/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Check that we don't call a ref bubble
 3 | #
 4 | 
 5 | SHELL:=/bin/bash -euo pipefail
 6 | 
 7 | CTXDIR=../../..
 8 | CTXPIPELINE=$(CTXDIR)/scripts/make-pipeline.pl
 9 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat
10 | READSIM=$(CTXDIR)/libs/readsim/readsim
11 | VCFCOUNT=$(CTXDIR)/libs/biogrok/vcf-count
12 | 
13 | REFLEN=1000
14 | K=31
15 | SEQDEPTH=30
16 | READLEN=50
17 | OUTDIR=proj
18 | 
19 | all: run
20 | 
21 | ref.fa:
22 | 	echo '>ref' > $@
23 | 	echo -n TCTCATATGGGCATTGTCGTCTGCCCGTCACCTTCGGTCGACGCTGTTCAACATTCGGTGTTGTAGTTTATTATACTAGCGCAATCCCCGAGTTTGGGCA >> $@
24 | 	echo    TCTCATATGGGCATTGTCGTCTGCCCGTCACCTTCGGTCGAgGCTGTTCAACATTCGGTGTTGTAGTTTATTATACTAGCGCAATCCCCGAGTTTGGGCA >> $@
25 | 
26 | reads/reads.fa.gz: ref.fa
27 | 	mkdir -p reads
28 | 	$(READSIM) -r ref.fa -l $(READLEN) -s -d $(SEQDEPTH) reads/reads
29 | 
30 | task.k$(K).mk:
31 | 	echo "RefReads reads/reads.fa.gz" | $(CTXPIPELINE) -r ref.fa $(K) proj - > $@
32 | 
33 | run: task.k$(K).mk reads/reads.fa.gz ref.fa
34 | 	$(MAKE) -f $< CTXDIR=$(CTXDIR) breakpoints-vcf
35 | 	@# Check no VCF entries
36 | 	(( `$(VCFCOUNT) proj/vcfs/breakpoints.joint.links.k$(K).vcf.gz` == 0 )) || false
37 | 	@# Check no breakpoint call entries
38 | 	(( `grep -c '>brkpnt' proj/k$(K)/breakpoints_links/joint.brk.gz` == 0 )) || false
39 | 	@echo 'Success: no breakpoint calls or VCF entries!'
40 | 
41 | clean:
42 | 	rm -rf ref.fa* reads proj task.k$(K).mk
43 | 
44 | .PHONY: all run clean
45 | 


--------------------------------------------------------------------------------
/src/graph/contig_confidence.h:
--------------------------------------------------------------------------------
 1 | #ifndef CONTIG_CONFIDENCE_H_
 2 | #define CONTIG_CONFIDENCE_H_
 3 | 
 4 | #include "madcrowlib/madcrow_buffer.h"
 5 | madcrow_buffer(double_buf,DoubleBuffer,double);
 6 | 
 7 | typedef struct {
 8 |   DoubleBuffer table;
 9 |   size_t ncols;
10 | } ContigConfidenceTable;
11 | 
12 | // Call conf_table_dealloc to release memory after calling this function
13 | // void conf_table_load_csv(ContigConfidenceTable *conf_table,
14 | //                          FILE *fh, const char *path);
15 | 
16 | // Call conf_table_dealloc to release memory after calling this function
17 | void conf_table_update_hist(ContigConfidenceTable *table,
18 |                             size_t col, size_t genome_size,
19 |                             size_t *contig_hist, size_t hist_len);
20 | 
21 | // Call conf_table_dealloc to release memory after calling this function
22 | void conf_table_calc(ContigConfidenceTable *table, size_t col,
23 |                      size_t max_read_len, double avg_bp_covg);
24 | 
25 | void conf_table_alloc(ContigConfidenceTable *table, size_t ncols);
26 | void conf_table_dealloc(ContigConfidenceTable *table);
27 | 
28 | double conf_table_lookup(const ContigConfidenceTable *table,
29 |                          size_t col, size_t dist);
30 | 
31 | void conf_table_print(const ContigConfidenceTable *table, FILE *fh);
32 | 
33 | void conf_table_save(const ContigConfidenceTable *table, const char *path);
34 | 
35 | #endif /* CONTIG_CONFIDENCE_H_ */
36 | 


--------------------------------------------------------------------------------
/src/basic/graph_info.h:
--------------------------------------------------------------------------------
 1 | #ifndef DB_INFO_H_
 2 | #define DB_INFO_H_
 3 | 
 4 | #include <inttypes.h>
 5 | #include "string_buffer/string_buffer.h"
 6 | #include "cortex_types.h"
 7 | #include "seq_loading_stats.h"
 8 | 
 9 | // Thesholds are zero if not used (e.g. cleaned_unitigs == false)
10 | // is_graph_intersection is for cleaning a low covg sample against
11 | // cleaned pool of population
12 | typedef struct
13 | {
14 |   bool cleaned_tips, cleaned_unitigs, cleaned_kmers;
15 |   Covg clean_unitigs_thresh, clean_kmers_thresh;
16 |   bool is_graph_intersection;
17 |   StrBuf intersection_name;
18 | } ErrorCleaning;
19 | 
20 | typedef struct
21 | {
22 |   uint32_t mean_read_length; // after trim = (total_seq / number of contigs)
23 |   uint64_t total_sequence;
24 |   StrBuf sample_name;
25 |   long double seq_err;
26 |   ErrorCleaning cleaning;
27 | } GraphInfo;
28 | 
29 | void graph_info_init(GraphInfo *ginfo);
30 | void graph_info_alloc(GraphInfo *ginfo);
31 | void graph_info_dealloc(GraphInfo *ginfo);
32 | 
33 | void graph_info_make_intersect(const GraphInfo *ginfo, StrBuf *intersect_name);
34 | void graph_info_append_intersect(ErrorCleaning *cleaning,
35 |                                  const char *intersect_name);
36 | 
37 | void graph_info_cpy(GraphInfo *dst, const GraphInfo *src);
38 | void graph_info_merge(GraphInfo *dst, const GraphInfo *src);
39 | 
40 | void graph_info_update_stats(GraphInfo *ginfo, const SeqLoadingStats *stats);
41 | 
42 | #endif /* GRAPH_INFO_H_ */
43 | 


--------------------------------------------------------------------------------
/tests/threading/threading3/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL:=/bin/bash -euo pipefail
 2 | 
 3 | K=9
 4 | CTXDIR=../../..
 5 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K)
 6 | CTX2DOT=$(CTXDIR)/scripts/perl/mccortex-graph-to-graphviz.pl
 7 | 
 8 | GRAPHS=genome.k9.ctx
 9 | LINKS=reads.pe.one.ctp reads.pe.two.ctp
10 | LOGS=$(addsuffix .log,$(GRAPHS) $(LINKS))
11 | TGTS=genome.fa read.1.fa read.2.fa $(GRAPHS) $(LINKS)
12 | # non-default target: genome.k9.pdf
13 | 
14 | all: $(TGTS)
15 | 
16 | clean:
17 | 	rm -rf $(TGTS) $(LOGS) gap_sizes.*.csv mp_sizes.*.csv genome.k$(K).pdf
18 | 
19 | plots: genome.k$(K).pdf
20 | 
21 | genome.fa:
22 | 	echo gCATCAGTGGCCttggcgactcgc > genome.fa
23 | 	echo TCATCAGTGGCCATGACGCTAACT >> genome.fa
24 | 
25 | read.1.fa:
26 | 	echo TCATCAGTGG > read.1.fa
27 | 
28 | read.2.fa:
29 | 	# echo ACGCTAACT > read.2.fa # Actually revcmp read2
30 | 	echo AGTTAGCGT > read.2.fa
31 | 
32 | genome.k$(K).ctx: genome.fa
33 | 	$(MCCORTEX) build -m 1M -k $(K) --sample Genome --seq genome.fa genome.k$(K).ctx >& $@.log
34 | 
35 | reads.pe.one.ctp: genome.k$(K).ctx read.1.fa read.2.fa
36 | 	$(MCCORTEX) thread -m 1M --print-contigs --one-way --seq2 read.1.fa:read.2.fa -o $@ genome.k$(K).ctx >& $@.log
37 | 
38 | reads.pe.two.ctp: genome.k$(K).ctx read.1.fa read.2.fa
39 | 	$(MCCORTEX) thread -m 1M --print-contigs --two-way --seq2 read.1.fa:read.2.fa -o $@ genome.k$(K).ctx >& $@.log
40 | 
41 | genome.k$(K).pdf:
42 | 	$(CTX2DOT) genome.k$(K).ctx | dot -Tpdf > genome.k$(K).pdf
43 | 
44 | .PHONY: all clean plots
45 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls1/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/bash -euo pipefail
 2 | 
 3 | #
 4 | # Test vcfcov with three groups of overlapping SNPs at positions ref:1,50,199
 5 | # and chr1:30. Length of chromosome is ref=200, chr1=100.
 6 | # We also test that we don't crash if we encounter a contig that was not defined
 7 | # in the header.
 8 | #
 9 | 
10 | K=21
11 | CTXDIR=../../..
12 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K)
13 | VCFENTRIES=$(CTXDIR)/libs/biogrok/vcf-entries
14 | 
15 | REF=../ref/ref.fa
16 | 
17 | all: check
18 | 
19 | clean:
20 | 	rm -rf calls.cov.vcf lowmem.cov.vcf graph.k$(K).ctx *.log
21 | 
22 | calls.cov.vcf: $(REF) calls.vcf graph.k$(K).ctx
23 | 	$(MCCORTEX) vcfcov -m 10M -o $@ -r $(REF) --high-mem calls.vcf graph.k$(K).ctx >& $@.log
24 | 
25 | lowmem.cov.vcf: $(REF) calls.vcf graph.k$(K).ctx
26 | 	$(MCCORTEX) vcfcov -m 10M -o $@ -r $(REF) --low-mem calls.vcf graph.k$(K).ctx >& $@.log
27 | 
28 | graph.k$(K).ctx: john.fa jane.fa
29 | 	$(MCCORTEX) build -m 10M -k $(K) \
30 | 	  --sample John --seq john.fa \
31 | 	  --sample Jane --seq jane.fa \
32 | 	  --sample Empty --seq <(echo '') \
33 | 	  $@ >& $@.log
34 | 
35 | check: calls.cov.vcf lowmem.cov.vcf truth.cov.vcf
36 | 	diff -q <($(VCFENTRIES) calls.cov.vcf) <($(VCFENTRIES) truth.cov.vcf)
37 | 	diff -q <($(VCFENTRIES) lowmem.cov.vcf) <($(VCFENTRIES) truth.cov.vcf)
38 | 	@echo "=> VCF files match."
39 | 
40 | view: calls.cov.vcf truth.cov.vcf
41 | 	gzip -fcd calls.cov.vcf
42 | 	gzip -fcd truth.cov.vcf
43 | 
44 | .PHONY: all clean view check
45 | 


--------------------------------------------------------------------------------
/results/var_calling_10ecoli/results/20150615_joint_1by1_links_plain/20150617.wed.stats.txt:
--------------------------------------------------------------------------------
 1 | Missed: 2415 / 45789 ( 5.27%)
 2 | FP:      384 / 43758 ( 0.88%)
 3 | Found:  43374 / 45789 (94.73%)
 4 | remember to delete temp dir: isec_1by1_cortex
 5 | Missed: 2232 / 45789 ( 4.87%)
 6 | FP:      391 / 43948 ( 0.89%)
 7 | Found:  43557 / 45789 (95.13%)
 8 | remember to delete temp dir: isec_brk_1by1_plain
 9 | Missed: 1977 / 45789 ( 4.32%)
10 | FP:      392 / 44204 ( 0.89%)
11 | Found:  43812 / 45789 (95.68%)
12 | remember to delete temp dir: isec_brk_1by1_links
13 | Missed: 2074 / 45789 ( 4.53%)
14 | FP:      233 / 43948 ( 0.53%)
15 | Found:  43715 / 45789 (95.47%)
16 | remember to delete temp dir: isec_brk_joint_plain
17 | Missed: 2009 / 45789 ( 4.39%)
18 | FP:      233 / 44013 ( 0.53%)
19 | Found:  43780 / 45789 (95.61%)
20 | remember to delete temp dir: isec_brk_joint_links
21 | Missed: 1716 / 45789 ( 3.75%)
22 | FP:     3327 / 47400 ( 7.02%)
23 | Found:  44073 / 45789 (96.25%)
24 | remember to delete temp dir: isec_bub_1by1_plain
25 | Missed: 1672 / 45789 ( 3.65%)
26 | FP:     4068 / 48185 ( 8.44%)
27 | Found:  44117 / 45789 (96.35%)
28 | remember to delete temp dir: isec_bub_1by1_links
29 | Missed: 3746 / 45789 ( 8.18%)
30 | FP:     3180 / 45223 ( 7.03%)
31 | Found:  42043 / 45789 (91.82%)
32 | remember to delete temp dir: isec_bub_joint_plain
33 | Missed: 3688 / 45789 ( 8.05%)
34 | FP:     3363 / 45464 ( 7.40%)
35 | Found:  42101 / 45789 (91.95%)
36 | remember to delete temp dir: isec_bub_joint_links
37 | 


--------------------------------------------------------------------------------
/scripts/seq2pdf.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Exit immediately if a command exits with a non-zero status.
 4 | set -euo pipefail
 5 | set +o posix
 6 | 
 7 | cmd=$0
 8 | 
 9 | function usage {
10 |   >&2 echo "usage $cmd [--simplify|--dot] <kmer> <file1> [...]"
11 |   >&2 echo "  prints pdf to stdout, so please remember to redirect"
12 |   >&2 echo "  e.g. $cmd 5 <(echo ACAACACGT) <(echo CCACACAA) > out.pdf"
13 |   exit -1
14 | }
15 | 
16 | script_args=
17 | mkpdf=1
18 | 
19 | while [[ $# -gt 2 ]]
20 | do
21 |   if [[ ($1 == "--simplify") ]]
22 |   then
23 |     script_args=$1
24 |     shift
25 |   elif [[ $1 == "--dot" ]]
26 |   then
27 |     mkpdf=0
28 |     shift
29 |   else
30 |     usage
31 |   fi
32 | done
33 | 
34 | if [[ $# -ne 2 || !( $1 =~ ^[0-9]+$ ) ]]
35 | then
36 |   usage
37 | fi
38 | 
39 | kmer=$1
40 | shift
41 | 
42 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && cd .. && pwd )"
43 | MCCORTEX="$DIR/bin/mccortex"
44 | CTX2GRAPHVIZ="$DIR/scripts/perl/mccortex-graph-to-graphviz.pl"
45 | if [[ !(-e $MCCORTEX) || !(-x $MCCORTEX) ]]
46 | then
47 |   echo "Did you compile McCortex? I cannot run `$MCCORTEX`"
48 |   exit -1
49 | fi
50 | 
51 | files=$(printf " --seq %s" $@; printf "\n")
52 | 
53 | if [[ $mkpdf == 1 ]]; then
54 |   $MCCORTEX $kmer build -q -k $kmer --sample seq2pdf $files - | \
55 |     $CTX2GRAPHVIZ -k $kmer $script_args - | \
56 |     dot -Tpdf
57 | else
58 |   $MCCORTEX $kmer build -q -k $kmer --sample seq2pdf $files - | \
59 |     $CTX2GRAPHVIZ -k $kmer $script_args -
60 | fi
61 | 


--------------------------------------------------------------------------------
/src/graph/db_unitig.h:
--------------------------------------------------------------------------------
 1 | #ifndef DB_UNITIG_H_
 2 | #define DB_UNITIG_H_
 3 | 
 4 | #include "cortex_types.h"
 5 | #include "db_graph.h"
 6 | #include "db_node.h"
 7 | 
 8 | // Orient unitig
 9 | // Once oriented, unitig has lowest poosible kmerkey at the beginning,
10 | // oriented FORWARDs if possible
11 | void db_unitig_normalise(dBNode *nlist, size_t len, const dBGraph *db_graph);
12 | 
13 | // Extend a unitig, nlist[offset] and olist[offset] must already be set
14 | // Walk along nodes starting from node/or, storing the unitig in nlist/olist
15 | // Returns the number of nodes added, adds no more than `limit`
16 | // return false if out of space and limit > 0
17 | bool db_unitig_extend(dBNodeBuffer *nbuf, size_t limit,
18 |                       const dBGraph *db_graph);
19 | 
20 | // Fills with unitig that contains hkey
21 | // Does not reset nbuf
22 | void db_unitig_fetch(hkey_t node, dBNodeBuffer *nbuf, const dBGraph *db_graph);
23 | 
24 | // Count number of read starts using coverage data
25 | size_t db_unitig_read_starts(const Covg *covgs, size_t len);
26 | size_t db_unitig_covg_mean(const Covg *covgs, size_t len);
27 | 
28 | /**
29 |  * @param visited must be initialised to zero, will be dirty upon return
30 |  **/
31 | void db_unitigs_iterate(size_t nthreads, uint8_t *visited,
32 |                         const dBGraph *db_graph,
33 |                         void (*func)(dBNodeBuffer nbuf, size_t threadid, void *arg),
34 |                         void *arg);
35 | 
36 | #endif /* DB_UNITIG_H_ */
37 | 


--------------------------------------------------------------------------------
/results/klebsiella/kleb_pneumoniae/platypus/call-platypus.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eou pipefail
 4 | set -o xtrace
 5 | 
 6 | REF=../ref/GCF_000016305.1_ASM1630v1_genomic.fa
 7 | BAM=../remap/mapped/KlebPneu.bam
 8 | BAMRMDUP=../remap/mapped/KlebPneu.rmdup.bam
 9 | 
10 | CTXDIR=~/mccortex
11 | VCF_ADD_CONTIGS=$CTXDIR/libs/biogrok/vcf-add-contigs
12 | VCF_PASS=$CTXDIR/libs/biogrok/vcf-pass
13 | 
14 | PLATDIR=~/bioinf/Platypus
15 | 
16 | source $PLATDIR/prepare.sh
17 | python $PLATDIR/bin/Platypus.py callVariants --logFileName platypus.rmdup.log \
18 |                                              --output=platypus.rmdup.vcf \
19 |                                              --refFile=$REF --bamFiles=$BAMRMDUP >& platypus.rmdup.vcf.log
20 | 
21 | python $PLATDIR/bin/Platypus.py callVariants --logFileName platypus.assem.log \
22 |                                              --output=platypus.assem.vcf --assemble=1 \
23 |                                              --refFile=$REF --bamFiles=$BAMRMDUP >& platypus.assem.vcf.log
24 | 
25 | # Add contigs to header
26 | $VCF_ADD_CONTIGS <(dnacat --lengths $REF) KlebPneu_MGH_78578 platypus.rmdup.vcf | \
27 |   $BCFTOOLS norm --check-ref x -m -any --fasta-ref $REF --site-win 5000 | \
28 |   $BCFTOOLS norm --rm-dup any --do-not-normalize | \
29 |   $VCF_PASS > platypus.vcf
30 | $BGZIP platypus.vcf
31 | $BCFTOOLS index platypus.vcf.gz
32 | 
33 | # Analysis
34 | rm -rf mummer_isec mapping_truth cortex.k31.k61.{mapping,isec}.log
35 | ./analysis.sh >& analysis.log
36 | 


--------------------------------------------------------------------------------
/tests/sort/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL:=/bin/bash -euo pipefail
 2 | 
 3 | # Any kmer (K=) should work
 4 | 
 5 | K=51
 6 | CTXDIR=../..
 7 | MCCORTEX=$(shell echo $(CTXDIR)/bin/mccortex$$[(($(K)+31)/32)*32 - 1])
 8 | DNACAT=$(CTXDIR)/libs/seq_file/bin/dnacat
 9 | 
10 | GRAPHS=seq.fa graph.k$(K).ctx build.then.sort.k$(K).ctx build.and.sort.k$(K).ctx
11 | MISC=kmers.sorted.k$(K).txt build.then.sort.k$(K).ctx.idx
12 | LOGS=$(addsuffix .log,$(GRAPHS) $(MISC))
13 | 
14 | all: title $(GRAPHS) $(MISC) check
15 | 
16 | title:
17 | 	@echo "-- Testing sort k=$(K) --"
18 | 
19 | clean:
20 | 	rm -rf $(GRAPHS) $(MISC) $(LOGS)
21 | 
22 | seq.fa:
23 | 	$(DNACAT) -F -n 100 > $@
24 | 
25 | graph.k$(K).ctx: seq.fa
26 | 	$(MCCORTEX) build -k $(K) --sample Jimmy --seq $< $@ >& $@.log
27 | 	$(MCCORTEX) check -q $@
28 | 
29 | build.then.sort.k$(K).ctx: graph.k$(K).ctx
30 | 	$(MCCORTEX) sort -o $@ $< >& $@.log
31 | 	$(MCCORTEX) check -q $@
32 | 
33 | build.and.sort.k$(K).ctx: seq.fa
34 | 	$(MCCORTEX) build -k $(K) --sort --sample Jimmy --seq $< $@ >& $@.log
35 | 	$(MCCORTEX) check -q $@
36 | 
37 | %.ctx.idx: %.ctx
38 | 	$(MCCORTEX) index --out $@ --block-kmers 11 $< >& $@.log
39 | 
40 | kmers.sorted.k$(K).txt: graph.k$(K).ctx
41 | 	$(MCCORTEX) view -q --kmers $< | sort > $@
42 | 
43 | check: kmers.sorted.k$(K).txt build.then.sort.k$(K).ctx build.and.sort.k$(K).ctx
44 | 	diff -q $< <($(MCCORTEX) view -q -k build.then.sort.k$(K).ctx)
45 | 	diff -q $< <($(MCCORTEX) view -q -k build.and.sort.k$(K).ctx)
46 | 
47 | .PHONY: all clean check title
48 | 


--------------------------------------------------------------------------------
/tests/vcfcov/calls2/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/bash -euo pipefail
 2 | 
 3 | #
 4 | # Test vcfcov with too many overlapping variants
 5 | #
 6 | # Test VCF of a SNP per base generated with:
 7 | #   ./fake-vcf.py ../ref/ref.fa > calls.vcf
 8 | #
 9 | 
10 | K=21
11 | CTXDIR=../../..
12 | MCCORTEX=$(CTXDIR)/bin/mccortex $(K)
13 | VCFENTRIES=$(CTXDIR)/libs/biogrok/vcf-entries
14 | 
15 | REF=../ref/ref.fa
16 | 
17 | all: test
18 | 
19 | clean:
20 | 	rm -rf calls.cov.vcf* lowmem.cov.vcf* graph.k$(K).ctx *.log
21 | 
22 | calls.cov.vcf.log: calls.cov.vcf
23 | calls.cov.vcf: $(REF) calls.vcf graph.k$(K).ctx
24 | 	$(MCCORTEX) vcfcov -m 10M -o $@ -r $(REF) --max-nvars 4 --high-mem calls.vcf graph.k$(K).ctx >& $@.log
25 | 
26 | lowmem.cov.vcf: $(REF) calls.vcf graph.k$(K).ctx
27 | 	$(MCCORTEX) vcfcov -m 10M -o $@ -r $(REF) --max-nvars 4 --low-mem calls.vcf graph.k$(K).ctx >& $@.log
28 | 
29 | graph.k$(K).ctx: sample.fa
30 | 	$(MCCORTEX) build -m 10M -k $(K) --sample John --seq sample.fa $@ >& $@.log
31 | 
32 | test: calls.cov.vcf lowmem.cov.vcf truth.cov.vcf calls.cov.vcf.log
33 | 	diff -q <($(VCFENTRIES) calls.cov.vcf) <($(VCFENTRIES) truth.cov.vcf)
34 | 	diff -q <($(VCFENTRIES) lowmem.cov.vcf) <($(VCFENTRIES) truth.cov.vcf)
35 | 	@echo "=> VCF files match."
36 | 	[[ `grep -o 'max alleles in buffer:.*' calls.cov.vcf.log | grep -o '[0-9][0-9]*'` -lt 50 ]]
37 | 	@echo "=> Buffer kept below 50 VCF entries."
38 | 
39 | view: calls.cov.vcf truth.cov.vcf
40 | 	gzip -fcd calls.cov.vcf
41 | 	gzip -fcd truth.cov.vcf
42 | 
43 | .PHONY: all clean view test
44 | 


--------------------------------------------------------------------------------
/src/basic/seq_loading_stats.h:
--------------------------------------------------------------------------------
 1 | #ifndef SEQ_LOADING_STATS_H_
 2 | #define SEQ_LOADING_STATS_H_
 3 | 
 4 | // Stucture for statistics on loading sequence and cortex binary files
 5 | typedef struct
 6 | {
 7 |   // num_se_reads includes good reads, bad reads and duplicates etc.
 8 |   size_t num_se_reads, num_pe_reads;
 9 |   size_t num_good_reads, num_bad_reads, num_dup_se_reads, num_dup_pe_pairs;
10 |   size_t total_bases_read, total_bases_loaded;
11 |   size_t contigs_parsed, num_kmers_parsed, num_kmers_loaded, num_kmers_novel;
12 |   uint64_t *col_nkmers, *col_sum_covgs;
13 |   size_t ncols; // max number of colours loaded
14 | } SeqLoadingStats;
15 | 
16 | #define SEQ_LOADING_STATS_INIT (SeqLoadingStats){ \
17 |   .num_se_reads     = 0, .num_pe_reads       = 0, \
18 |   .num_good_reads   = 0, .num_bad_reads      = 0, \
19 |   .num_dup_se_reads = 0, .num_dup_pe_pairs   = 0, \
20 |   .total_bases_read = 0, .total_bases_loaded = 0, \
21 |   .contigs_parsed   = 0, .num_kmers_parsed   = 0, \
22 |   .num_kmers_loaded = 0, .num_kmers_novel    = 0, \
23 |   .col_nkmers = NULL, .col_sum_covgs = NULL, \
24 |   .ncols = 0 \
25 | }
26 | 
27 | // Functions for dealing with file loading statistics
28 | #define seq_loading_stats_init(s) memset(s, 0, sizeof(SeqLoadingStats))
29 | void seq_loading_stats_merge(SeqLoadingStats *dst, const SeqLoadingStats *src);
30 | 
31 | // @ht_num_kmers is the number of kmers loaded into the graph
32 | void seq_loading_stats_print(const SeqLoadingStats *stats, size_t ht_num_kmers);
33 | 
34 | #endif /* SEQ_LOADING_STATS_H_ */
35 | 


--------------------------------------------------------------------------------