├── .gitignore ├── .gitlab-ci.yml ├── .gitlab └── issue_templates │ └── bug.md ├── .pre-commit-config.yaml ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── development ├── Linux │ ├── extractor-template.sh │ └── install.sh ├── MacOSX │ ├── INSTALL │ ├── postinstall.sh │ └── variant_tools.pmdoc │ │ ├── 01variant-contents.xml │ │ ├── 01variant.xml │ │ └── index.xml ├── conda │ ├── build.sh │ └── meta.yaml ├── docker │ └── Dockerfile ├── docker_ci │ └── Dockerfile ├── manage_resource.py ├── md5_annoDB.py └── monitor.py ├── resources ├── annotation │ ├── CancerGeneCensus-20111215.ann │ ├── CancerGeneCensus-20120315.ann │ ├── CancerGeneCensus-20130711.ann │ ├── CancerGeneCensus-20170912.ann │ ├── CosmicCodingMuts-v61_260912.ann │ ├── CosmicCodingMuts-v67_20131024.ann │ ├── CosmicCodingMuts-v82_20170801.ann │ ├── CosmicMutantExport-v61_260912.ann │ ├── CosmicMutantExport-v67_241013.ann │ ├── CosmicMutantExport-v82_20170803.ann │ ├── CosmicNonCodingVariants-v61_260912.ann │ ├── CosmicNonCodingVariants-v67_241013.ann │ ├── CosmicNonCodingVariants-v82_20170801.ann │ ├── DGV-hg18_20130723.ann │ ├── DGV-hg19_20130723.ann │ ├── DGV-hg19_20160515.ann │ ├── DGV-hg19_20160831.ann │ ├── DGV-hg38_20160831.ann │ ├── ESP-6500SI-V2-SSA137.ann │ ├── EntrezGene-20131028.ann │ ├── EntrezGene-20170919.ann │ ├── EntrezGene2RefSeq-20131028.ann │ ├── EntrezGene2RefSeq-20170919.ann │ ├── ExAC-hg19_r0.2.ann │ ├── ExAC.ann │ ├── HGNC-20131029.ann │ ├── HGNC-20170920.ann │ ├── Illumina_NRCE-20130307.ann │ ├── LCR-hg19_20090320.ann │ ├── ccdsGene-hg19_20110909.ann │ ├── ccdsGene-hg19_20111206.ann │ ├── ccdsGene-hg19_20130904.ann │ ├── ccdsGene-hg38_20171008.ann │ ├── ccdsGene_exon-hg19_20110909.ann │ ├── ccdsGene_exon-hg19_20111206.ann │ ├── ccdsGene_exon-hg19_20130904.ann │ ├── ccdsGene_exon-hg38_20171008.ann │ ├── ccdsGene_exon_hg19-20111206.ann │ ├── ccdsGene_exon_hg38-20171008.ann │ ├── ccdsGene_hg19-20111206.ann │ ├── clinvar-20150804.ann │ ├── clinvar-20150929.ann │ ├── clinvar-20160104.ann │ ├── clinvar-20160107.ann │ ├── clinvar-20171002.ann │ ├── cytoBand-hg18_20111216.ann │ ├── cytoBand-hg19_20111216.ann │ ├── cytoBand-hg38_20140810.ann │ ├── dbNSFP-hg18_hg19_1.1_2.ann │ ├── dbNSFP-hg18_hg19_1_3.ann │ ├── dbNSFP-hg18_hg19_2_0.ann │ ├── dbNSFP-hg18_hg19_2_0b4.ann │ ├── dbNSFP-hg18_hg19_2_1.ann │ ├── dbNSFP-hg18_hg19_2_3.ann │ ├── dbNSFP-hg18_hg19_2_4.ann │ ├── dbNSFP-hg18_hg19_2_7.ann │ ├── dbNSFP-hg18_hg19_2_9.ann │ ├── dbNSFP-hg38_3_5a.ann │ ├── dbNSFP_gene-2_0.ann │ ├── dbNSFP_gene-2_1.ann │ ├── dbNSFP_gene-2_3.ann │ ├── dbNSFP_gene-2_4.ann │ ├── dbNSFP_gene-2_7.ann │ ├── dbNSFP_gene-3_5a.ann │ ├── dbNSFP_light-hg18_hg19_1.0_0.ann │ ├── dbNSFP_light-hg18_hg19_1_3.ann │ ├── dbSNP-hg18_129.ann │ ├── dbSNP-hg18_130.ann │ ├── dbSNP-hg19_131.ann │ ├── dbSNP-hg19_132.ann │ ├── dbSNP-hg19_135-1.ann │ ├── dbSNP-hg19_135.ann │ ├── dbSNP-hg19_137.ann │ ├── dbSNP-hg19_138.ann │ ├── dbSNP-hg38_143.ann │ ├── dbscSNV-hg19_20141120.ann │ ├── dbscSNV-hg38_20150412.ann │ ├── genomicSuperDups-hg19_20130626.ann │ ├── genomicSuperDups-hg38_20141018.ann │ ├── gwasCatalog-hg19_20111220.ann │ ├── gwasCatalog-hg19_20140112.ann │ ├── gwasCatalog-hg38_20171004.ann │ ├── hapmap_ASW_freq-hg18_20100817.ann │ ├── hapmap_CEU_freq-hg18_20100817.ann │ ├── hapmap_CHB_freq-hg18_20100817.ann │ ├── hapmap_CHD_freq-hg18_20100817.ann │ ├── hapmap_GIH_freq-hg18_20100817.ann │ ├── hapmap_JPT_freq-hg18_20100817.ann │ ├── hapmap_LWK_freq-hg18_20100817.ann │ ├── hapmap_MEX_freq-hg18_20100817.ann │ ├── hapmap_MKK_freq-hg18_20100817.ann │ ├── hapmap_TSI_freq-hg18_20100817.ann │ ├── hapmap_YRI_freq-hg18_20100817.ann │ ├── keggPathway-20110823.ann │ ├── knownGene-hg18_20110909.ann │ ├── knownGene-hg18_20121219.ann │ ├── knownGene-hg19_20110909.ann │ ├── knownGene-hg19_20121219.ann │ ├── knownGene-hg19_20130904.ann │ ├── knownGene-hg38_20160328.ann │ ├── knownGene_exon-hg18_20110909.ann │ ├── knownGene_exon-hg19_20110909.ann │ ├── knownGene_exon-hg19_20130904.ann │ ├── knownGene_exon-hg38_20160328.ann │ ├── phastCons-hg19_20110909.ann │ ├── phastCons-hg19_20130322.ann │ ├── phastCons-hg38_20150913.ann │ ├── phastConsElements-hg19_20130622.ann │ ├── phastConsElements-hg38_20150913.ann │ ├── refGene-hg18_20110909.ann │ ├── refGene-hg19_20110909.ann │ ├── refGene-hg19_20130904.ann │ ├── refGene-hg38_20170201.ann │ ├── refGene-hg38_20171008.ann │ ├── refGene-mm10_20141201.ann │ ├── refGene_exon-hg18_20110909.ann │ ├── refGene_exon-hg19_20110909.ann │ ├── refGene_exon-hg19_20130904.ann │ ├── refGene_exon-mm10_20141201.ann │ ├── refGene_exon-mm10_20171008.ann │ ├── thousandGenomes-hg19_v3_20101123.ann │ └── thousandGenomes-hg19_v5b_20130502.ann ├── format │ ├── ANNOVAR.fmt │ ├── ANNOVAR_exonic_variant_function.fmt │ ├── ANNOVAR_variant_function.fmt │ ├── CASAVA18_indels.fmt │ ├── CASAVA18_snps.fmt │ ├── CGA.fmt │ ├── MAF.fmt │ ├── basic.fmt │ ├── csv.fmt │ ├── map.fmt │ ├── pileup_indel.fmt │ ├── plink.fmt │ ├── polyphen2.fmt │ ├── rsname.fmt │ ├── tped.fmt │ ├── twoalleles.fmt │ └── vcf.fmt ├── pipeline │ ├── ANNOVAR.pipeline │ ├── DNASeq_tools.py │ ├── KING.pipeline │ ├── anno_utils.pipeline │ ├── bwa_gatk28_b37.pipeline │ ├── bwa_gatk28_hg19.pipeline │ ├── bwa_gatk33_b37.pipeline │ ├── bwa_gatk33_hg19.pipeline │ ├── illumina.pipeline │ ├── import_vcf.pipeline │ ├── mosaik_gatk23_align.pipeline │ ├── snpEff.pipeline │ └── transmission.pipeline └── simulation │ ├── Lineage.pipeline │ ├── Lineage.py │ ├── Peng2011_srv.pipeline │ ├── Peng2014_ex1.pipeline │ ├── Peng2014_ex2.pipeline │ └── VST_srv.py ├── setup.py ├── src ├── cgatools │ ├── core.hpp │ ├── reference │ │ ├── ChromosomeIdField.cpp │ │ ├── ChromosomeIdField.hpp │ │ ├── CompactDnaSequence.cpp │ │ ├── CompactDnaSequence.hpp │ │ ├── CrrFile.cpp │ │ ├── CrrFile.hpp │ │ ├── CrrFileWriter.cpp │ │ ├── CrrFileWriter.hpp │ │ ├── GeneDataStore.cpp │ │ ├── GeneDataStore.hpp │ │ ├── RangeAnnotationStore.hpp │ │ ├── RepeatMaskerStore.hpp │ │ └── range.hpp │ └── util │ │ ├── BaseUtil.cpp │ │ ├── BaseUtil.hpp │ │ ├── DelimitedFile.cpp │ │ ├── DelimitedFile.hpp │ │ ├── DelimitedLineParser.cpp │ │ ├── DelimitedLineParser.hpp │ │ ├── Exception.cpp │ │ ├── Exception.hpp │ │ ├── GenericHistogram.cpp │ │ ├── GenericHistogram.hpp │ │ ├── IndirectComparator.hpp │ │ ├── Md5.cpp │ │ ├── Md5.hpp │ │ ├── RangeIntersector.hpp │ │ ├── RangeSet.cpp │ │ ├── RangeSet.hpp │ │ ├── Streams.cpp │ │ ├── Streams.hpp │ │ ├── StringSet.cpp │ │ ├── StringSet.hpp │ │ ├── parse.cpp │ │ └── parse.hpp ├── code_style.cfg ├── hdf5-blosc │ ├── blosc_filter.c │ ├── blosc_filter.h │ ├── blosc_plugin.c │ └── blosc_plugin.h ├── libplinkio │ ├── COPYING │ ├── LICENSE │ ├── bed.c │ ├── bed.h │ ├── bed_header.c │ ├── bed_header.h │ ├── bim.c │ ├── bim.h │ ├── bim_parse.c │ ├── bim_parse.h │ ├── common.h │ ├── cplinkio.c │ ├── csv.h │ ├── fam.c │ ├── fam.h │ ├── fam_parse.c │ ├── fam_parse.h │ ├── file.c │ ├── file.h │ ├── libcsv.c │ ├── plinkio.c │ ├── plinkio.h │ ├── snp_lookup.h │ ├── snp_lookup_big.h │ ├── snp_lookup_little.h │ ├── snparray.c │ ├── snparray.h │ ├── status.h │ └── utarray.h ├── rext │ ├── MetaSKAT.PFF.VAT.R │ └── MetaSKAT.VAT.R ├── sqlite │ ├── py2 │ │ ├── cache.c │ │ ├── cache.h │ │ ├── connection.c │ │ ├── connection.h │ │ ├── cursor.c │ │ ├── cursor.h │ │ ├── microprotocols.c │ │ ├── microprotocols.h │ │ ├── module.c │ │ ├── module.h │ │ ├── prepare_protocol.c │ │ ├── prepare_protocol.h │ │ ├── row.c │ │ ├── row.h │ │ ├── sqlitecompat.h │ │ ├── statement.c │ │ ├── statement.h │ │ ├── util.c │ │ └── util.h │ ├── py3 │ │ ├── cache.c │ │ ├── cache.h │ │ ├── connection.c │ │ ├── connection.h │ │ ├── cursor.c │ │ ├── cursor.h │ │ ├── microprotocols.c │ │ ├── microprotocols.h │ │ ├── module.c │ │ ├── module.h │ │ ├── prepare_protocol.c │ │ ├── prepare_protocol.h │ │ ├── row.c │ │ ├── row.h │ │ ├── sqlitecompat.h │ │ ├── statement.c │ │ ├── statement.h │ │ ├── util.c │ │ └── util.h │ ├── shell.c │ ├── sqlite3.c │ ├── sqlite3.h │ ├── sqlite3ext.h │ └── vt_sqlite3_ext.cpp ├── swigpyrun.h ├── ucsc │ ├── inc │ │ ├── aliType.h │ │ ├── asParse.h │ │ ├── bPlusTree.h │ │ ├── bamFile.h │ │ ├── base64.h │ │ ├── basicBed.h │ │ ├── bbiFile.h │ │ ├── bigBed.h │ │ ├── bigWig.h │ │ ├── binRange.h │ │ ├── bits.h │ │ ├── bwgInternal.h │ │ ├── cheapcgi.h │ │ ├── cirTree.h │ │ ├── common.h │ │ ├── dlist.h │ │ ├── dnaseq.h │ │ ├── dnautil.h │ │ ├── dystring.h │ │ ├── errabort.h │ │ ├── filePath.h │ │ ├── fuzzyFind.h │ │ ├── gfxPoly.h │ │ ├── hash.h │ │ ├── hmmstats.h │ │ ├── htmshell.h │ │ ├── https.h │ │ ├── internet.h │ │ ├── kxTok.h │ │ ├── linefile.h │ │ ├── localmem.h │ │ ├── memalloc.h │ │ ├── memgfx.h │ │ ├── mime.h │ │ ├── net.h │ │ ├── obscure.h │ │ ├── options.h │ │ ├── pipeline.h │ │ ├── portable.h │ │ ├── psl.h │ │ ├── rangeTree.h │ │ ├── rbTree.h │ │ ├── regexHelper.h │ │ ├── sig.h │ │ ├── sqlList.h │ │ ├── sqlNum.h │ │ ├── tokenizer.h │ │ ├── udc.h │ │ ├── vcf.h │ │ ├── verbose.h │ │ └── zlibFace.h │ ├── lib │ │ ├── aliType.c │ │ ├── asParse.c │ │ ├── bPlusTree.c │ │ ├── bamFile.c │ │ ├── base64.c │ │ ├── basicBed.c │ │ ├── bbiRead.c │ │ ├── bigBed.c │ │ ├── binRange.c │ │ ├── bits.c │ │ ├── bwgQuery.c │ │ ├── cheapcgi.c │ │ ├── cirTree.c │ │ ├── common.c │ │ ├── dlist.c │ │ ├── dnautil.c │ │ ├── dystring.c │ │ ├── errabort.c │ │ ├── ffAli.c │ │ ├── filePath.c │ │ ├── hash.c │ │ ├── hmmstats.c │ │ ├── htmshell.c │ │ ├── https.c │ │ ├── intExp.c │ │ ├── internet.c │ │ ├── kxTok.c │ │ ├── linefile.c │ │ ├── localmem.c │ │ ├── memalloc.c │ │ ├── mime.c │ │ ├── net.c │ │ ├── obscure.c │ │ ├── osunix.c │ │ ├── pipeline.c │ │ ├── portimpl.c │ │ ├── portimpl.h │ │ ├── psl.c │ │ ├── rangeTree.c │ │ ├── rbTree.c │ │ ├── regexHelper.c │ │ ├── servBrcMcw.c │ │ ├── servCrunx.c │ │ ├── servcis.c │ │ ├── servcl.c │ │ ├── servmsII.c │ │ ├── servpws.c │ │ ├── sqlList.c │ │ ├── sqlNum.c │ │ ├── tokenizer.c │ │ ├── udc.c │ │ ├── vcf.c │ │ ├── verbose.c │ │ ├── wildcmp.c │ │ └── zlibFace.c │ ├── samtools │ │ ├── bam.c │ │ ├── bam.h │ │ ├── bam_aux.c │ │ ├── bam_import.c │ │ ├── bam_index.c │ │ ├── bam_pileup.c │ │ ├── bgzf.c │ │ ├── bgzf.h │ │ ├── faidx.c │ │ ├── faidx.h │ │ ├── kstring.h │ │ ├── razf.c │ │ ├── razf.h │ │ ├── sam.c │ │ ├── sam.h │ │ ├── sam_header.c │ │ └── sam_header.h │ └── tabix │ │ ├── bam_endian.h │ │ ├── bedidx.c │ │ ├── index.c │ │ ├── khash.h │ │ ├── knetfile.c │ │ ├── knetfile.h │ │ ├── kseq.h │ │ ├── ksort.h │ │ ├── kstring.c │ │ ├── kstring.h │ │ └── tabix.h └── variant_tools │ ├── __init__.py │ ├── _version.py │ ├── accessor.py │ ├── action.cpp │ ├── action.h │ ├── annotation.py │ ├── assoData.cpp │ ├── assoData.h │ ├── assoTests.h │ ├── assoTests.i │ ├── assoTests.py │ ├── assoTests_wrap.cpp │ ├── association.py │ ├── association_hdf5.py │ ├── cgatools.i │ ├── cgatools.py │ ├── cgatools_wrap.cpp │ ├── checking_asso_result.py │ ├── compare.py │ ├── exporter.py │ ├── exporter_reader.py │ ├── fisher2.c │ ├── fisher2.h │ ├── geno_store.py │ ├── genotypes.c │ ├── genotypes.h │ ├── importer.py │ ├── importer_allele_hdf5.py │ ├── io_vcf_read.pyx │ ├── liftOver.py │ ├── lm.cpp │ ├── lm.h │ ├── merge_sort_parallel.py │ ├── meta.py │ ├── phenotype.py │ ├── pipeline.py │ ├── plinkfile.py │ ├── plot.py │ ├── preprocessor.py │ ├── project.py │ ├── rtester.py │ ├── simulation.py │ ├── site_options.py │ ├── tester.py │ ├── text_reader.py │ ├── ucsctools.i │ ├── ucsctools.py │ ├── ucsctools_wrap.cpp │ ├── update.py │ ├── utils.cpp │ ├── utils.h │ ├── utils.py │ ├── variant.py │ ├── vt_sqlite3.py │ ├── vtools.py │ ├── vtools_association_cluster.lsf │ ├── vtools_association_cluster.pbs │ ├── vtools_report.py │ └── worker_zmq.py └── test ├── ann ├── testNSFP.DB.gz ├── testNSFP.ann ├── testNSFP.zip ├── testThousandGenomes.ann ├── testThousandGenomes.vcf.head └── testThousandGenomes.zip ├── fmt ├── basic_hg18.fmt ├── dbSNP_hg19validation.fmt ├── genotypes.fmt ├── missing_gen.fmt ├── multi_index.fmt ├── new_format.fmt └── randcol.fmt ├── output ├── CGA_variant.txt ├── assogrp1.txt ├── assogrp2.txt ├── assogrp3.txt ├── assogrp4.txt ├── assogrp5.txt ├── assogrp6.txt ├── assogrp7.txt ├── assogrp8.txt ├── assores1.txt ├── assores2.txt ├── assores3.txt ├── assores4.txt ├── assores5.txt ├── assores6.txt ├── assores7.txt ├── assores8.txt ├── assores_wss1.txt ├── assores_wss2.txt ├── assores_wss3.txt ├── assores_wss4.txt ├── assores_wss5.txt ├── assores_wss6.txt ├── assores_wss7.txt ├── assores_wss8.txt ├── evsVariantTest.txt ├── exclude_anno.txt ├── exclude_anno1.txt ├── exclude_select_anno.txt ├── exclude_sift.txt ├── genotype_variant_sample_output.txt ├── import_cga.txt ├── import_cga_phenotype.txt ├── import_csv.txt ├── import_customized.txt ├── import_genotype_1.txt ├── import_genotype_2.txt ├── import_mixed_build.txt ├── import_mpi_genotype.txt ├── import_mpi_genotypes.txt ├── import_mpi_multi_genotype.txt ├── import_mpi_multi_genotype_hdf5.txt ├── import_mpi_multi_genotypes.txt ├── import_mpi_multi_samples.txt ├── import_mpi_multi_variant.txt ├── import_mpi_samples.txt ├── import_mpi_variant.txt ├── import_multi_sample2_samples.txt ├── import_multi_sample2_samples_hdf5.txt ├── import_multi_sample2_variant.txt ├── import_multi_sample_samples.txt ├── import_multi_sample_samples_hdf5.txt ├── import_multi_sample_variant.txt ├── import_txt_1.txt ├── import_vcf_alt.txt ├── import_vcf_ref.txt ├── liftover.txt ├── liftover_cmp.txt ├── missing_gen.tped ├── phenotype_fields.txt ├── phenotype_import.txt ├── phenotype_phenotype_with_filename.txt ├── phenotype_phenotype_with_filename_field.txt ├── remove_field_after.txt ├── remove_field_before.txt ├── remove_genofield_after.txt ├── remove_genofield_after_hdf5.txt ├── remove_genofield_after_sqlite.txt ├── remove_genofield_before.txt ├── remove_genofield_before_hdf5.txt ├── remove_genofield_before_sqlite.txt ├── remove_phenotype.txt ├── remove_phenotype_output.txt ├── remove_phenotype_sqlite.txt ├── update_sum_stat.txt ├── use_field.txt ├── use_position.txt ├── vcf_assigned_sample_name_genotype.txt ├── vcf_multiple_sample_name.txt ├── vcf_multiple_samples_genotypes.txt └── vcf_single_sampleName_genotype.txt ├── phenotype ├── badphenotype1.txt ├── badphenotype2.txt ├── badphenotype3.txt ├── pheno_filename.txt └── phenotype.txt ├── plink ├── dat1.bed ├── dat1.bim └── dat1.fam ├── proj └── assoproj.tar.gz ├── run_tests.py ├── temp_test_import_hdf5.py ├── testUtils.py ├── test_admin.py ├── test_associate.py ├── test_avg_depth.py ├── test_compare.py ├── test_exclude.py ├── test_execute.py ├── test_export.py ├── test_func.py ├── test_import.py ├── test_init.py ├── test_liftover.py ├── test_output.py ├── test_phenotype.py ├── test_pipeline.pipeline ├── test_pipeline.py ├── test_remove.py ├── test_select.py ├── test_show.py ├── test_trans_ratio.py ├── test_update.py ├── test_use.py ├── txt ├── ANNOVAR.txt ├── CASAVA18_INDEL.txt ├── CASAVA18_SNP.txt ├── CGA.tsv.bz2 ├── annovar.txt.exonic_variant_function ├── assoc.dat ├── assoc.fmt ├── assoc.phen ├── complteGenomics.tsv.bz2 ├── dbSNP_hg19validation.txt ├── genotypes.txt ├── input.tsv ├── invalid.tsv ├── pileup.indel ├── sample_1_chr22.txt ├── sample_chr22.txt ├── test.csv └── variants.txt └── vcf ├── 500SAMP.vcf ├── CEU.vcf.gz ├── CEU.vcf.gz.tbi ├── CEU_dup.vcf.gz ├── EMPTY.vcf ├── SAMP1.vcf ├── SAMP2.vcf ├── SAMP3_complex_variants.vcf ├── SAMP4_complex_variants.vcf ├── V1.vcf ├── V2.vcf ├── V3.vcf ├── chromX.vcf.gz ├── compare.vcf ├── dup_geno.vcf ├── hdf5_test.h5 ├── hdf5_test.vcf ├── input.vcf ├── input_nogeno.vcf ├── missing_gen.vcf ├── missing_gen_hdf5.vcf ├── var_format.vcf └── with_wildtype.vcf /.gitignore: -------------------------------------------------------------------------------- 1 | src/hdf5-blosc/.genotypes.c.swp 2 | src/hdf5-blosc/builder 3 | src/hdf5-blosc/src 4 | src/variant_tools/blosc_filter.o 5 | boost_1_49_0/ 6 | src/boost_1_49_0/ 7 | src/zeromq-4.0.3/ 8 | test/.snapshot.info 9 | src/variant_tools/celery_main/ 10 | src/variant_tools/io_vcf_read.c 11 | src/variant_tools.egg-info/ 12 | test/*.log 13 | test/*.DB 14 | test/*.proj 15 | test/*.gz 16 | test/*.tfam 17 | test/*.vcf 18 | test/.vtools_cache/ 19 | test/10 20 | test/ann/testNSFP.DB 21 | test/parent/ 22 | test/tmp_*_genotypes.h5 23 | test/tmp_*_genotypes_multi_genes.h5 24 | test/ceu/ 25 | test/sam1/ 26 | build 27 | dist 28 | cgatools 29 | sqlite 30 | libplinkio 31 | ucsc 32 | *.swp 33 | build 34 | dist 35 | *.pyc 36 | *.log 37 | variant_tools/assoTests.py 38 | variant_tools/assoTests_wrap.cpp 39 | variant_tools/cgatools.py 40 | variant_tools/cgatools_wrap.cpp 41 | variant_tools/swigpyrun.h 42 | variant_tools/ucsctools.py 43 | variant_tools/ucsctools_wrap.cpp 44 | .DS_Store 45 | src/variant_tools/*.o 46 | .vscode/ 47 | -------------------------------------------------------------------------------- /.gitlab/issue_templates/bug.md: -------------------------------------------------------------------------------- 1 | # Issue template 2 | -[]What 3 | -[]How 4 | -[]When 5 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v2.4.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-yaml 10 | - id: check-added-large-files 11 | - id: flake8 12 | args: ["--ignore=E501,W504,W503, E128"] 13 | - repo: https://github.com/pre-commit/mirrors-yapf 14 | rev: '' 15 | hooks: 16 | - id: yapf 17 | args: [--style, "{based_on_style:chromium,indent_width:4}"] 18 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | os: 2 | - linux 3 | # travis does not support python on osx yet (https://github.com/travis-ci/travis-ci/issues/4729) 4 | language: python 5 | python: 6 | # - "3.4" 7 | - "3.5" 8 | - "3.6" 9 | 10 | sudo: required 11 | services: 12 | - docker 13 | # before_install: 14 | # - sudo apt-get install swig zlibc zlib1g zlib1g-dev libblas-dev liblapack-dev 15 | # install: "python setup.py install" 16 | # before_script: cd test 17 | # script: 18 | # - python run_tests.py 19 | install: 20 | - docker pull junmahouston/vtools_test:v3 21 | - docker run -dt --name vtools_test junmahouston/vtools_test:v3 22 | - docker cp ./src vtools_test:/home/bpeng/VariantTools 23 | - docker cp ./test vtools_test:/home/bpeng/VariantTools 24 | - docker cp setup.py vtools_test:/home/bpeng/VariantTools 25 | - docker exec vtools_test bash -c "cd VariantTools && python setup.py install" 26 | script: 27 | - docker exec vtools_test bash -c "cd VariantTools/test && python run_tests.py" 28 | 29 | email: 30 | recipients: 31 | - junmahouston@gmail.com 32 | on_success: never 33 | on_failure: always 34 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # $File: MANIFEST.in $ 2 | # $LastChangedDate: 2011-06-16 20:10:41 -0500 (Thu, 16 Jun 2011) $ 3 | # $Rev: 4234 $ 4 | # 5 | # This file is part of variant_tools, a software application to annotate, 6 | # summarize, and filter variants for next-gen sequencing ananlysis. 7 | # Please visit http://varianttools.sourceforge.net for details. 8 | # 9 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org) 10 | # 11 | # This program is free software: you can redistribute it and/or modify 12 | # it under the terms of the GNU General Public License as published by 13 | # the Free Software Foundation, either version 3 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU General Public License 22 | # along with this program. If not, see . 23 | # 24 | 25 | include setup.py 26 | include README.md 27 | include LICENSE 28 | # libplinkio 29 | recursive-include src *.py *.c *.cpp *.h *.hpp *.i *.ipp *.pyx 30 | recursive-include test *.py 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![PyPI version](https://badge.fury.io/py/variant-tools.svg)](https://badge.fury.io/py/variant-tools) 2 | 3 | # Variant Tools 4 | 5 | A command line tool for the manipulation, annotation, and analysis of genetic variants 6 | from next-generation sequencing studies. 7 | 8 | # Installation 9 | 10 | If you are using a conda environment, you can install variant tools with command 11 | 12 | ``` 13 | conda install variant_tools -c bioconda -c conda-forge 14 | ``` 15 | Option `-c conda-forge` is required to enforce the use of `conda-forge` version of dependencies (e.g. `boost-cpp`) over their counterpoarts in the base channel. 16 | 17 | Otherwise, you can try to install it through `pip` 18 | 19 | ``` 20 | pip install variant_tools 21 | ``` 22 | 23 | You will need to install 24 | 25 | * `libboost` 26 | * `gsl` 27 | * `numpy` 28 | * `Cython` 29 | * `hdf5` 30 | * `blosc` 31 | * A C++ compiler such as `gcc` 32 | 33 | which, in a conda environment, could be installed with command 34 | 35 | ``` 36 | conda install -c conda-forge boost-cpp gsl numpy cython blosc hdf5 37 | ``` 38 | 39 | This method can be used if you download or clone the latest version 40 | of variant tools from this repository. 41 | 42 | # Documentation 43 | 44 | Please refer to [Variant Tools documentation](https://vatlab.github.io/vat-docs/) for details. 45 | -------------------------------------------------------------------------------- /development/Linux/install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | is_relative() { 4 | local path="$1" 5 | shift 6 | 7 | [ "${path:0:1}" != "/" ] 8 | return 9 | } 10 | install () { 11 | mkdir -p $2/bin 12 | rm -rf $2/lib/variant_tools &> /dev/null 13 | mkdir -p $2/lib/variant_tools 14 | cp -r $1/* $2/lib/variant_tools 15 | for cmd in vtools vtools_report; do 16 | rm -rf $2/bin/$cmd &> /dev/null 17 | ln -s $2/lib/variant_tools/$cmd $2/bin/$cmd 18 | done 19 | echo -e "Libraries are installed to $2/lib\nBinary files are installed to $2/bin\n" 20 | } 21 | main () { 22 | local fullpath="" 23 | echo "Enter installation directory for variant tools & variant association tools: " 24 | printf "\t [/usr/local] " 25 | read fullpath 26 | if [ -z $fullpath ]; then 27 | install $1 "/usr/local" 28 | else 29 | eval fullpath=$fullpath 30 | if is_relative $fullpath; then 31 | fullpath=$PWD/$fullpath 32 | fi 33 | install $1 $fullpath 34 | fi 35 | } 36 | main $@ 37 | -------------------------------------------------------------------------------- /development/MacOSX/INSTALL: -------------------------------------------------------------------------------- 1 | This disk contains 2 | 3 | * A README file. 4 | 5 | * A MacOS X installer that installs variant tools apps to system Applications 6 | directory and commands vtools and vtools_report to /usr/local/bin. It 7 | requires root privilege. 8 | 9 | * Two self-contained executables vtools and vtools_report that can be executed 10 | directly without installation. These commands start slower than the installed 11 | version and are not recommended for general use. 12 | -------------------------------------------------------------------------------- /development/MacOSX/postinstall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # $File: postinstall.sh $ 4 | # $LastChangedDate: 2013-04-16 13:32:03 -0500 (Tue, 16 Apr 2013) $ 5 | # $Rev: 1825 $ 6 | # 7 | # This file is part of variant_tools, a software application to annotate, 8 | # summarize, and filter variants for next-gen sequencing ananlysis. 9 | # Please visit http://varianttools.sourceforge.net for details. 10 | # 11 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org) 12 | # 13 | # This program is free software: you can redistribute it and/or modify 14 | # it under the terms of the GNU General Public License as published by 15 | # the Free Software Foundation, either version 3 of the License, or 16 | # (at your option) any later version. 17 | # 18 | # This program is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 | # GNU General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU General Public License 24 | # along with this program. If not, see . 25 | # 26 | 27 | # install vtools and vtools_report to /usr/local/bin 28 | # 29 | if [ -f /usr/local/bin/vtools ] 30 | then 31 | /bin/rm -f /usr/local/bin/vtools 32 | fi 33 | 34 | if [ -f /usr/local/bin/vtools_report ] 35 | then 36 | /bin/rm -f /usr/local/bin/vtools_report 37 | fi 38 | 39 | /bin/ln -s /Applications/variant_tools/variant_tools.app/Contents/MacOS/vtools /usr/local/bin 40 | /bin/ln -s /Applications/variant_tools/variant_tools.app/Contents/MacOS/vtools_report /usr/local/bin 41 | 42 | exit 0 43 | -------------------------------------------------------------------------------- /development/MacOSX/variant_tools.pmdoc/01variant.xml: -------------------------------------------------------------------------------- 1 | variant_tools.vt.pkg1.0.6../../dist/variant_tools/ApplicationsinstallTo.pathversionparentidentifierinstallToincludeRoot../../development/MacOSX/postinstall.sh01variant-contents.xml/CVS$/\.svn$/\.cvsignore$/\.cvspass$/\.DS_Store$ 2 | -------------------------------------------------------------------------------- /development/conda/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | $PYTHON setup.py install 4 | 5 | # Add more build steps here, if they are necessary. 6 | 7 | # See 8 | # http://docs.continuum.io/conda/build.html 9 | # for a list of environment variables that are set during the build process. 10 | -------------------------------------------------------------------------------- /development/conda/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: variant_tools 3 | version: !!str 3.0.1 4 | 5 | source: 6 | fn: variant_tools-3.0.1.tar.gz 7 | url: https://pypi.python.org/packages/source/v/variant_tools/variant_tools-3.0.1.tar.gz 8 | # patches: 9 | # List any patch files here 10 | # - fix.patch 11 | 12 | # build: 13 | # preserve_egg_dir: True 14 | # entry_points: 15 | # Put any entry points (scripts to be generated automatically) here. The 16 | # syntax is module:function. For example 17 | # 18 | # - simupop = simupop:main 19 | # 20 | # Would create an entry point called simupop that calls simupop.main() 21 | 22 | 23 | # If this is a new build for the same version, increment the build 24 | # number. If you do not include this key, it defaults to 0. 25 | # number: 1 26 | 27 | requirements: 28 | build: 29 | - python # [py3k] 30 | - setuptools 31 | - numpy 32 | - cython 33 | - pyzmq 34 | 35 | run: 36 | - python # [py3k] 37 | - numpy 38 | 39 | test: 40 | # Python imports 41 | imports: 42 | - variant_tools 43 | 44 | # commands: 45 | # You can put test commands to be run here. Use this to test that the 46 | # entry points work. 47 | 48 | 49 | # You can also put a file called run_test.py in the recipe that will be run 50 | # at test time. 51 | 52 | # requires: 53 | # Put any additional test requirements here. For example 54 | # - nose 55 | 56 | about: 57 | home: http://varianttools.sourceforge.net 58 | license: GNU General Public License (GPL) 59 | summary: 'Integrated annotation and analysis of next gen sequencing data' 60 | 61 | # See 62 | # http://docs.continuum.io/conda/build.html for 63 | # more information about meta.yaml 64 | -------------------------------------------------------------------------------- /development/docker_ci/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Docker image for variant tools 3 | # 4 | FROM continuumio/miniconda3 5 | 6 | MAINTAINER Bo Peng 7 | 8 | RUN apt-get update 9 | RUN apt-get -y install swig gcc g++ build-essential bzip2 libbz2-dev libz-dev curl git vim libblas-dev liblapack-dev libcurl4-openssl-dev libssl-dev 10 | 11 | 12 | RUN conda update python 13 | RUN pip install numpy scipy tables cython 14 | 15 | RUN conda install -c conda-forge hdf5 blosc gsl libboost 16 | ENV LD_INCLUDE_PATH=/opt/conda/include/ 17 | 18 | 19 | WORKDIR /home/bpeng 20 | RUN git clone http://github.com/vatlab/VariantTools VariantTools 21 | 22 | WORKDIR /home/bpeng/VariantTools 23 | RUN git fetch 24 | RUN git checkout f74ee0c66e042f55d82c2a67d14c20e054e57597 25 | RUN python setup.py install 26 | 27 | ENV HOME /home/bpeng 28 | RUN mkdir /home/bpeng/temp 29 | 30 | # download hg19 reference genome and refGene database 31 | # WORKDIR /home/bpeng/temp 32 | RUN touch temp.vcf 33 | RUN vtools init test --build hg19 34 | RUN vtools import temp.vcf 35 | RUN vtools use refGene 36 | 37 | WORKDIR /home/bpeng 38 | RUN rm -rf temp 39 | 40 | RUN mkdir /home/bpeng/temp 41 | 42 | # download hg18 reference genome and refGene database 43 | WORKDIR /home/bpeng/temp 44 | RUN touch temp.vcf 45 | RUN vtools init test --build hg18 46 | RUN vtools import temp.vcf 47 | RUN vtools use refGene 48 | 49 | WORKDIR /home/bpeng 50 | RUN rm -rf temp 51 | -------------------------------------------------------------------------------- /resources/annotation/CancerGeneCensus-20111215.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | # 7 | # To rebuild this database using a new version of data 8 | # 1. download an excel file from Cancer Genome Project: http://www.sanger.ac.uk/genetics/CGP/Census/ 9 | # 2. save the data in a tab-delimited text file in filename CancerGeneCensus.txt. Remove the header. 10 | # 3. run vtools use CancerGeneCensus.ann --files CancerGeneCensus.txt 11 | 12 | [linked fields] 13 | *=GeneSymbol 14 | 15 | [data sources] 16 | description=Cancer Genome Project 17 | version=20111215 18 | encoding=ISO-8859-1 19 | anno_type=field 20 | direct_url=annoDB/CancerGeneCensus-20111215.DB.gz 21 | source_url= 22 | source_type=txt 23 | 24 | [GeneSymbol] 25 | index=1 26 | type=VARCHAR(255) 27 | 28 | [Name] 29 | index=2 30 | type=VARCHAR(255) 31 | 32 | [GeneID] 33 | index=3 34 | type=VARCHAR(255) 35 | 36 | [Chr] 37 | index=4 38 | type=VARCHAR(255) 39 | 40 | [ChrBand] 41 | index=5 42 | type=VARCHAR(255) 43 | 44 | [CancerSomaticMut] 45 | index=6 46 | type=VARCHAR(255) 47 | 48 | [CancerGermlineMut] 49 | index=7 50 | type=VARCHAR(255) 51 | 52 | [TumourTypesSomatic] 53 | index=8 54 | type=VARCHAR(255) 55 | 56 | [TumourTypesGermline] 57 | index=9 58 | type=VARCHAR(255) 59 | 60 | [CancerSyndrome] 61 | index=10 62 | type=VARCHAR(255) 63 | 64 | [TissueType] 65 | index=11 66 | type=VARCHAR(255) 67 | 68 | [CancerMolecularGenetics] 69 | index=12 70 | type=VARCHAR(255) 71 | 72 | [MutationType] 73 | index=13 74 | type=VARCHAR(255) 75 | 76 | [TranslocationPartner] 77 | index=14 78 | type=VARCHAR(255) 79 | 80 | [OtherGermlineMut] 81 | index=15 82 | type=VARCHAR(255) 83 | 84 | [OtherSyndromeOrDisease] 85 | index=16 86 | type=VARCHAR(255) 87 | -------------------------------------------------------------------------------- /resources/annotation/CosmicCodingMuts-v61_260912.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Format/New for 5 | # a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr, pos, ref, alt 9 | 10 | [data sources] 11 | description=Cosmic coding mutation database. This data contains mutations affecting 10 or less nucleotides in REF. The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mutations In Cancer web site, http://www.sanger.ac.uk/cosmic. Bamford et al (2004). The COSMIC (Catalogue of Somatic Mutations in Cancer) database and website. Br J Cancer, 91,355-358. 12 | version=v61_260912 13 | anno_type=variant 14 | direct_url=annoDB/CosmicCodingMuts-v61_260912.DB.gz 15 | source_url=ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/CosmicCodingMuts_v61_260912.vcf.gz 16 | source_type=txt 17 | source_pattern= 18 | 19 | [chr] 20 | index=1 21 | type=VARCHAR(20) 22 | comment=Chromosome 23 | 24 | [pos] 25 | index=2 26 | type=INTEGER NOT NULL 27 | comment=1-based position 28 | 29 | [COSMIC_ID] 30 | index=3 31 | type=VARCHAR(48) 32 | comment=cosmic id of mutation 33 | 34 | [ref] 35 | index=4 36 | type=VARCHAR(255) 37 | comment=Reference allele, '-' for insertion. 38 | 39 | [alt] 40 | index=5 41 | adj=CheckSplit() 42 | type=VARCHAR(255) 43 | comment=Alternative allele, '-' for deletion. 44 | 45 | [gene] 46 | index=8 47 | type=VARCHAR(255) 48 | adj=ExtractValue('GENE=', ';') 49 | comment=genename 50 | 51 | [strand] 52 | index=8 53 | adj=ExtractValue('STRAND=', ';') 54 | type=VARCHAR(255) 55 | comment=strand 56 | 57 | [CDS] 58 | index=8 59 | adj=ExtractValue('CDS=', ';') 60 | type=VARCHAR(255) 61 | comment=CDS annotation 62 | 63 | [AA] 64 | index=8 65 | adj=ExtractValue('AA=', ';') 66 | type=VARCHAR(255) 67 | comment=Peptide annotation 68 | 69 | [CNT] 70 | index=8 71 | adj=ExtractValue('CNT=', ';') 72 | type=INT 73 | comment=Number of samples with this mutation 74 | -------------------------------------------------------------------------------- /resources/annotation/CosmicCodingMuts-v67_20131024.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Format/New for 5 | # a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr, pos, ref, alt 9 | 10 | [data sources] 11 | description=Cosmic coding mutation database. This data contains mutations affecting 10 or less nucleotides in REF. The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mutations In Cancer web site, http://www.sanger.ac.uk/cosmic. Bamford et al (2004). The COSMIC (Catalogue of Somatic Mutations in Cancer) database and website. Br J Cancer, 91,355-358. 12 | version=v67_20131024 13 | anno_type=variant 14 | source_url=ftp://ngs.sanger.ac.uk/production/cosmic/CosmicCodingMuts_v67_20131024.vcf.gz 15 | direct_url=annoDB/CosmicCodingMuts-v67_20131024.DB.gz ad998078fab6ee4c225ab438f75f2360 16 | source_type=txt 17 | source_pattern= 18 | 19 | [chr] 20 | index=1 21 | type=VARCHAR(20) 22 | comment=Chromosome 23 | 24 | [pos] 25 | index=2 26 | type=INTEGER NOT NULL 27 | comment=1-based position 28 | 29 | [COSMIC_ID] 30 | index=3 31 | type=VARCHAR(48) 32 | comment=cosmic id of mutation 33 | 34 | [ref] 35 | index=4 36 | type=VARCHAR(255) 37 | comment=Reference allele, '-' for insertion. 38 | 39 | [alt] 40 | index=5 41 | adj=CheckSplit() 42 | type=VARCHAR(255) 43 | comment=Alternative allele, '-' for deletion. 44 | 45 | [gene] 46 | index=8 47 | type=VARCHAR(255) 48 | adj=ExtractValue('GENE=', ';') 49 | comment=genename 50 | 51 | [strand] 52 | index=8 53 | adj=ExtractValue('STRAND=', ';') 54 | type=VARCHAR(255) 55 | comment=strand 56 | 57 | [CDS] 58 | index=8 59 | adj=ExtractValue('CDS=', ';') 60 | type=VARCHAR(255) 61 | comment=CDS annotation 62 | 63 | [AA] 64 | index=8 65 | adj=ExtractValue('AA=', ';') 66 | type=VARCHAR(255) 67 | comment=Peptide annotation 68 | 69 | [CNT] 70 | index=8 71 | adj=ExtractValue('CNT=', ';') 72 | type=INT 73 | comment=Number of samples with this mutation 74 | -------------------------------------------------------------------------------- /resources/annotation/CosmicNonCodingVariants-v61_260912.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Format/New for 5 | # a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr, pos, ref, alt 9 | 10 | [data sources] 11 | description=Cosmic non-coding mutation database. This data contains mutations affecting 10 or less nucleotides in REF. The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mutations In Cancer web site, http://www.sanger.ac.uk/cosmic. Bamford et al (2004). The COSMIC (Catalogue of Somatic Mutations in Cancer) database and website. Br J Cancer, 91,355-358. 12 | version=v61_260912 13 | anno_type=variant 14 | direct_url=annoDB/CosmicNonCodingVariants-v61_260912.DB.gz 15 | source_url=ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/CosmicNonCodingVariants_v61_260912.vcf.gz 16 | source_type=txt 17 | source_pattern= 18 | 19 | [chr] 20 | index=1 21 | type=VARCHAR(20) 22 | comment=Chromosome 23 | 24 | [pos] 25 | index=2 26 | type=INTEGER NOT NULL 27 | comment=1-based position 28 | 29 | [COSMIC_ID] 30 | index=3 31 | type=VARCHAR(48) 32 | comment=cosmic id of mutation 33 | 34 | [ref] 35 | index=4 36 | type=VARCHAR(255) 37 | comment=Reference allele, '-' for insertion. 38 | 39 | [alt] 40 | index=5 41 | adj=CheckSplit() 42 | type=VARCHAR(255) 43 | comment=Alternative allele, '-' for deletion. 44 | 45 | [gene] 46 | index=8 47 | type=VARCHAR(255) 48 | adj=ExtractValue('GENE=', ';') 49 | comment=genename 50 | 51 | [strand] 52 | index=8 53 | adj=ExtractValue('STRAND=', ';') 54 | type=VARCHAR(255) 55 | comment=strand 56 | -------------------------------------------------------------------------------- /resources/annotation/CosmicNonCodingVariants-v67_241013.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Format/New for 5 | # a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr, pos, ref, alt 9 | 10 | [data sources] 11 | description=Cosmic non-coding mutation database. This data contains mutations affecting 10 or less nucleotides in REF. The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mutations In Cancer web site, http://www.sanger.ac.uk/cosmic. Bamford et al (2004). The COSMIC (Catalogue of Somatic Mutations in Cancer) database and website. Br J Cancer, 91,355-358. 12 | version=v67_241013 13 | anno_type=variant 14 | source_url=ftp://ngs.sanger.ac.uk/production/cosmic/CosmicNonCodingVariants_v67_20131024.vcf.gz 15 | direct_url=annoDB/CosmicNonCodingVariants-v67_241013.DB.gz 07366d9d5ba0cd79e03893263d31b7ea 16 | source_type=txt 17 | source_pattern= 18 | 19 | [chr] 20 | index=1 21 | type=VARCHAR(20) 22 | comment=Chromosome 23 | 24 | [pos] 25 | index=2 26 | type=INTEGER NOT NULL 27 | comment=1-based position 28 | 29 | [COSMIC_ID] 30 | index=3 31 | type=VARCHAR(48) 32 | comment=cosmic id of mutation 33 | 34 | [ref] 35 | index=4 36 | type=VARCHAR(255) 37 | comment=Reference allele, '-' for insertion. 38 | 39 | [alt] 40 | index=5 41 | adj=CheckSplit() 42 | type=VARCHAR(255) 43 | comment=Alternative allele, '-' for deletion. 44 | 45 | [gene] 46 | index=8 47 | type=VARCHAR(255) 48 | adj=ExtractValue('GENE=', ';') 49 | comment=genename 50 | 51 | [strand] 52 | index=8 53 | adj=ExtractValue('STRAND=', ';') 54 | type=VARCHAR(255) 55 | comment=strand 56 | -------------------------------------------------------------------------------- /resources/annotation/DGV-hg18_20130723.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg18=chr,start,end 9 | 10 | [data sources] 11 | description=Database of Genomic Variants, a curated catalogue of human genomic structural variation. 12 | anno_type=range 13 | header=1 14 | version=hg18_20130723 15 | source_url=http://dgv.tcag.ca/dgv/docs/NCBI36_hg18_variants_2013-07-23.txt 16 | direct_url=annoDB/DGV-hg18_20130723.DB.gz 17 | source_type=txt 18 | delimiter="\t" 19 | 20 | [variantaccession] 21 | index=1 22 | type=VARCHAR(10) 23 | 24 | [chr] 25 | index=2 26 | type=VARCHAR(48) 27 | 28 | [start] 29 | index=3 30 | type=INT 31 | 32 | [end] 33 | index=4 34 | type=INT 35 | 36 | [varianttype] 37 | index=5 38 | type=VARCHAR(3) 39 | 40 | [variantsubtype] 41 | index=6 42 | type=VARCHAR(11) 43 | 44 | [reference] 45 | index=7 46 | type=VARCHAR(31) 47 | 48 | [pubmedid] 49 | index=8 50 | type=INT 51 | 52 | [method] 53 | index=9 54 | type=VARCHAR(67) 55 | 56 | [platform] 57 | index=10 58 | type=VARCHAR(181) 59 | 60 | [mergedvariants] 61 | index=11 62 | type=VARCHAR(255) 63 | 64 | [supportingvariants] 65 | index=12 66 | type=VARCHAR(1144) 67 | 68 | [mergedorsample] 69 | index=13 70 | type=VARCHAR(1) 71 | 72 | [frequency] 73 | index=14 74 | type=VARCHAR(255) 75 | 76 | [samplesize] 77 | index=15 78 | type=INT 79 | 80 | [observedgains] 81 | index=16 82 | type=INT 83 | 84 | [observedlosses] 85 | index=17 86 | type=INT 87 | 88 | [cohortdescription] 89 | index=18 90 | type=VARCHAR(69) 91 | 92 | [genes] 93 | index=19 94 | type=VARCHAR(412) 95 | 96 | [samples] 97 | index=20 98 | type=VARCHAR(951) 99 | -------------------------------------------------------------------------------- /resources/annotation/DGV-hg19_20130723.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr,start,end 9 | 10 | [data sources] 11 | description=Database of Genomic Variants, a curated catalogue of human genomic structural variation. 12 | anno_type=range 13 | header=1 14 | version=hg19_20130723 15 | source_url=http://dgv.tcag.ca/dgv/docs/GRCh37_hg19_variants_2013-07-23.txt 16 | direct_url=annoDB/DGV-hg19_20130723.DB.gz a9c4f9f23ce4595d9c33b0a499273d53 17 | source_type=txt 18 | delimiter="\t" 19 | 20 | [variantaccession] 21 | index=1 22 | type=VARCHAR(10) 23 | 24 | [chr] 25 | index=2 26 | type=VARCHAR(48) 27 | 28 | [start] 29 | index=3 30 | type=INT 31 | 32 | [end] 33 | index=4 34 | type=INT 35 | 36 | [varianttype] 37 | index=5 38 | type=VARCHAR(3) 39 | 40 | [variantsubtype] 41 | index=6 42 | type=VARCHAR(11) 43 | 44 | [reference] 45 | index=7 46 | type=VARCHAR(31) 47 | 48 | [pubmedid] 49 | index=8 50 | type=INT 51 | 52 | [method] 53 | index=9 54 | type=VARCHAR(67) 55 | 56 | [platform] 57 | index=10 58 | type=VARCHAR(181) 59 | 60 | [mergedvariants] 61 | index=11 62 | type=VARCHAR(255) 63 | 64 | [supportingvariants] 65 | index=12 66 | type=VARCHAR(1144) 67 | 68 | [mergedorsample] 69 | index=13 70 | type=VARCHAR(1) 71 | 72 | [frequency] 73 | index=14 74 | type=VARCHAR(255) 75 | 76 | [samplesize] 77 | index=15 78 | type=INT 79 | 80 | [observedgains] 81 | index=16 82 | type=INT 83 | 84 | [observedlosses] 85 | index=17 86 | type=INT 87 | 88 | [cohortdescription] 89 | index=18 90 | type=VARCHAR(69) 91 | 92 | [genes] 93 | index=19 94 | type=VARCHAR(412) 95 | 96 | [samples] 97 | index=20 98 | type=VARCHAR(951) 99 | -------------------------------------------------------------------------------- /resources/annotation/DGV-hg19_20160515.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) Man Chong Leong 2017 (henryleong@rice.edu) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr,start,end 9 | 10 | [data sources] 11 | description=Database of Genomic Variants, a curated catalogue of human genomic structural variation. 12 | anno_type=range 13 | header=1 14 | version=hg19_20160515 15 | source_url=http://dgv.tcag.ca/dgv/docs/GRCh37_hg19_variants_2016-05-15.txt 16 | #direct_url=annoDB/DGV-hg19_20160515.DB.gz a9c4f9f23ce4595d9c33b0a499273d53 17 | source_type=txt 18 | delimiter="\t" 19 | 20 | [variantaccession] 21 | index=1 22 | type=VARCHAR(10) 23 | 24 | [chr] 25 | index=2 26 | type=VARCHAR(48) 27 | 28 | [start] 29 | index=3 30 | type=INT 31 | 32 | [end] 33 | index=4 34 | type=INT 35 | 36 | [varianttype] 37 | index=5 38 | type=VARCHAR(3) 39 | 40 | [variantsubtype] 41 | index=6 42 | type=VARCHAR(11) 43 | 44 | [reference] 45 | index=7 46 | type=VARCHAR(31) 47 | 48 | [pubmedid] 49 | index=8 50 | type=INT 51 | 52 | [method] 53 | index=9 54 | type=VARCHAR(67) 55 | 56 | [platform] 57 | index=10 58 | type=VARCHAR(181) 59 | 60 | [mergedvariants] 61 | index=11 62 | type=VARCHAR(255) 63 | 64 | [supportingvariants] 65 | index=12 66 | type=VARCHAR(1144) 67 | 68 | [mergedorsample] 69 | index=13 70 | type=VARCHAR(1) 71 | 72 | [frequency] 73 | index=14 74 | type=VARCHAR(255) 75 | 76 | [samplesize] 77 | index=15 78 | type=INT 79 | 80 | [observedgains] 81 | index=16 82 | type=INT 83 | 84 | [observedlosses] 85 | index=17 86 | type=INT 87 | 88 | [cohortdescription] 89 | index=18 90 | type=VARCHAR(69) 91 | 92 | [genes] 93 | index=19 94 | type=VARCHAR(412) 95 | 96 | [samples] 97 | index=20 98 | type=VARCHAR(951) 99 | -------------------------------------------------------------------------------- /resources/annotation/DGV-hg19_20160831.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) Man Chong Leong 2017 (henryleong@rice.edu) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg38=chr,start,end 9 | 10 | [data sources] 11 | description=Database of Genomic Variants, a curated catalogue of human genomic structural variation. 12 | anno_type=range 13 | header=1 14 | version=hg38_20160831 15 | source_url=http://dgv.tcag.ca/dgv/docs/GRCh38_hg38_variants_2016-08-31.txt 16 | #direct_url=annoDB/DGV-hg38_20160515.DB.gz a9c4f9f23ce4595d9c33b0a499273d53 17 | source_type=txt 18 | delimiter="\t" 19 | 20 | [variantaccession] 21 | index=1 22 | type=VARCHAR(10) 23 | 24 | [chr] 25 | index=2 26 | type=VARCHAR(48) 27 | 28 | [start] 29 | index=3 30 | type=INT 31 | 32 | [end] 33 | index=4 34 | type=INT 35 | 36 | [varianttype] 37 | index=5 38 | type=VARCHAR(3) 39 | 40 | [variantsubtype] 41 | index=6 42 | type=VARCHAR(11) 43 | 44 | [reference] 45 | index=7 46 | type=VARCHAR(31) 47 | 48 | [pubmedid] 49 | index=8 50 | type=INT 51 | 52 | [method] 53 | index=9 54 | type=VARCHAR(67) 55 | 56 | [platform] 57 | index=10 58 | type=VARCHAR(181) 59 | 60 | [mergedvariants] 61 | index=11 62 | type=VARCHAR(255) 63 | 64 | [supportingvariants] 65 | index=12 66 | type=VARCHAR(1144) 67 | 68 | [mergedorsample] 69 | index=13 70 | type=VARCHAR(1) 71 | 72 | [frequency] 73 | index=14 74 | type=VARCHAR(255) 75 | 76 | [samplesize] 77 | index=15 78 | type=INT 79 | 80 | [observedgains] 81 | index=16 82 | type=INT 83 | 84 | [observedlosses] 85 | index=17 86 | type=INT 87 | 88 | [cohortdescription] 89 | index=18 90 | type=VARCHAR(69) 91 | 92 | [genes] 93 | index=19 94 | type=VARCHAR(412) 95 | 96 | [samples] 97 | index=20 98 | type=VARCHAR(951) 99 | -------------------------------------------------------------------------------- /resources/annotation/DGV-hg38_20160831.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) Man Chong Leong 2017 (henryleong@rice.edu) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg38=chr,start,end 9 | 10 | [data sources] 11 | description=Database of Genomic Variants, a curated catalogue of human genomic structural variation. 12 | anno_type=range 13 | header=1 14 | version=hg38_20160831 15 | source_url=http://dgv.tcag.ca/dgv/docs/GRCh38_hg38_variants_2016-08-31.txt 16 | direct_url=annoDB/DGV-hg38_20160831.DB.gz b2183622dfc3cbbcf7d8640893dbf9fd 17 | source_type=txt 18 | delimiter="\t" 19 | 20 | [variantaccession] 21 | index=1 22 | type=VARCHAR(10) 23 | 24 | [chr] 25 | index=2 26 | type=VARCHAR(48) 27 | 28 | [start] 29 | index=3 30 | type=INT 31 | 32 | [end] 33 | index=4 34 | type=INT 35 | 36 | [varianttype] 37 | index=5 38 | type=VARCHAR(3) 39 | 40 | [variantsubtype] 41 | index=6 42 | type=VARCHAR(11) 43 | 44 | [reference] 45 | index=7 46 | type=VARCHAR(31) 47 | 48 | [pubmedid] 49 | index=8 50 | type=INT 51 | 52 | [method] 53 | index=9 54 | type=VARCHAR(67) 55 | 56 | [platform] 57 | index=10 58 | type=VARCHAR(181) 59 | 60 | [mergedvariants] 61 | index=11 62 | type=VARCHAR(255) 63 | 64 | [supportingvariants] 65 | index=12 66 | type=VARCHAR(1144) 67 | 68 | [mergedorsample] 69 | index=13 70 | type=VARCHAR(1) 71 | 72 | [frequency] 73 | index=14 74 | type=VARCHAR(255) 75 | 76 | [samplesize] 77 | index=15 78 | type=INT 79 | 80 | [observedgains] 81 | index=16 82 | type=INT NULL 83 | adj=Nullify(' ') 84 | 85 | [observedlosses] 86 | index=17 87 | type=INT NULL 88 | adj=Nullify(' ') 89 | 90 | [cohortdescription] 91 | index=18 92 | type=VARCHAR(69) 93 | 94 | [genes] 95 | index=19 96 | type=VARCHAR(412) 97 | 98 | [samples] 99 | index=20 100 | type=VARCHAR(951) 101 | -------------------------------------------------------------------------------- /resources/annotation/Illumina_NRCE-20130307.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=Chromosome,Start,End 9 | 10 | [data sources] 11 | anno_type=range 12 | description=This annotation database contains expanded exome targeted regions covered by 13 | the Nextera Rapid Capture Expanded platform from illumina. 14 | version=20130307 15 | source_url=http://supportres.illumina.com/documents/documentation/chemistry_documentation/samplepreps_nextera/nexterarapidcapture/nexterarapidcapture_expandedexome_targetedregions.txt 16 | direct_url=annoDB/Illumina_NRCE-20130307.DB.gz b6480f4d89cf763924a7d22207c89057 17 | header=7 18 | source_type=txt 19 | 20 | [Name] 21 | index=1 22 | type=VARCHAR(48) 23 | comment=Name of region 24 | 25 | [Chromosome] 26 | index=2 27 | adj=RemoveLeading('chr') 28 | type=VARCHAR(20) 29 | 30 | [Start] 31 | index=3 32 | type=INTEGER 33 | adj=IncreaseBy(1) 34 | comment=Transcription start position 35 | 36 | [End] 37 | index=4 38 | type=INTEGER 39 | comment=Transcription end position 40 | 41 | # the source file has two additional columns Probe Length and Downstream probe length, but 42 | # they are all zero as far as I can tell. 43 | -------------------------------------------------------------------------------- /resources/annotation/ccdsGene-hg19_20110909.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr, txStart, txEnd 9 | 10 | [data sources] 11 | anno_type=range 12 | description=CCDS Genes 13 | version=hg19_20110909 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz 15 | direct_url=annoDB/ccdsGene-hg19_20110909.DB.gz 16 | source_type=txt 17 | 18 | [name] 19 | index=2 20 | type=VARCHAR(255) 21 | comment=Gene name (usually a CCDS transcript ID) 22 | 23 | [chr] 24 | index=3 25 | adj=RemoveLeading('chr') 26 | type=VARCHAR(20) 27 | 28 | [strand] 29 | index=4 30 | type=CHAR(1) NULL 31 | comment=which DNA strand contains the observed alleles 32 | 33 | [txStart] 34 | index=5 35 | type=INTEGER 36 | adj=IncreaseBy(1) 37 | comment=Transcription start position 38 | 39 | [txEnd] 40 | index=6 41 | type=INTEGER 42 | comment=Transcription end position 43 | 44 | [cdsStart] 45 | index=7 46 | type=INTEGER 47 | adj=IncreaseBy(1) 48 | comment=Coding region start 49 | 50 | [cdsEnd] 51 | index=8 52 | type=INTEGER 53 | comment=Coding region end 54 | 55 | [exonCount] 56 | index=9 57 | type=INTEGER NULL 58 | comment=Number of exons 59 | 60 | [score] 61 | index=12 62 | type=INTEGER NULL 63 | comment=Score 64 | 65 | [name2] 66 | index=13 67 | type=VARCHAR(255) 68 | comment=Alternate name 69 | 70 | [cdsStartStat] 71 | index=14 72 | type=VARCHAR(10) 73 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1' 74 | 75 | [cdsEndStat] 76 | index=15 77 | type=VARCHAR(10) 78 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1' 79 | -------------------------------------------------------------------------------- /resources/annotation/ccdsGene-hg19_20111206.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr, cdsStart, cdsEnd 9 | 10 | [data sources] 11 | anno_type=range 12 | description=CCDS Genes 13 | version=hg19_20111206 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz 15 | direct_url=annoDB/ccdsGene-hg19_20111206.DB.gz 16 | source_type=txt 17 | 18 | [name] 19 | index=2 20 | type=VARCHAR(255) 21 | comment=Gene name (usually a CCDS transcript ID) 22 | 23 | [chr] 24 | index=3 25 | adj=RemoveLeading('chr') 26 | type=VARCHAR(20) 27 | 28 | [strand] 29 | index=4 30 | type=CHAR(1) NULL 31 | comment=which DNA strand contains the observed alleles 32 | 33 | [cdsStart] 34 | index=7 35 | type=INTEGER 36 | adj=IncreaseBy(1) 37 | comment=Coding region start 38 | 39 | [cdsEnd] 40 | index=8 41 | type=INTEGER 42 | comment=Coding region end 43 | 44 | [exonCount] 45 | index=9 46 | type=INTEGER NULL 47 | comment=Number of exons 48 | 49 | [score] 50 | index=12 51 | type=INTEGER NULL 52 | comment=Score 53 | 54 | [name2] 55 | index=13 56 | type=VARCHAR(255) 57 | comment=Alternate name 58 | 59 | [cdsStartStat] 60 | index=14 61 | type=VARCHAR(10) 62 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1' 63 | 64 | [cdsEndStat] 65 | index=15 66 | type=VARCHAR(10) 67 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1' 68 | -------------------------------------------------------------------------------- /resources/annotation/ccdsGene-hg19_20130904.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr, cdsStart, cdsEnd 9 | 10 | [data sources] 11 | anno_type=range 12 | description=High-confidence human gene annotations from the Consensus Coding Sequence (CCDS) project. 13 | version=hg19_20130904 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz 15 | direct_url=annoDB/ccdsGene-hg19_20130904.DB.gz 50f2d2e271c7c43beba1b2175ddf62a8 16 | source_type=txt 17 | 18 | [name] 19 | index=2 20 | type=VARCHAR(255) 21 | comment=Gene name (usually a CCDS transcript ID) 22 | 23 | [chr] 24 | index=3 25 | adj=RemoveLeading('chr') 26 | type=VARCHAR(20) 27 | 28 | [strand] 29 | index=4 30 | type=CHAR(1) NULL 31 | comment=which DNA strand contains the observed alleles 32 | 33 | [cdsStart] 34 | index=7 35 | type=INTEGER 36 | adj=IncreaseBy(1) 37 | comment=Coding region start 38 | 39 | [cdsEnd] 40 | index=8 41 | type=INTEGER 42 | comment=Coding region end 43 | 44 | [exonCount] 45 | index=9 46 | type=INTEGER NULL 47 | comment=Number of exons 48 | 49 | [score] 50 | index=12 51 | type=INTEGER NULL 52 | comment=Score 53 | 54 | [name2] 55 | index=13 56 | type=VARCHAR(255) 57 | comment=Alternate name 58 | 59 | [cdsStartStat] 60 | index=14 61 | type=VARCHAR(10) 62 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1' 63 | 64 | [cdsEndStat] 65 | index=15 66 | type=VARCHAR(10) 67 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1' 68 | -------------------------------------------------------------------------------- /resources/annotation/ccdsGene-hg38_20171008.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Man Chong Leong (henryleong@rice.edu) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg38=chr, cdsStart, cdsEnd 9 | 10 | [data sources] 11 | anno_type=range 12 | description=High-confidence human gene annotations from the Consensus Coding Sequence (CCDS) project. 13 | version=hg38_20171008 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/ccdsGene.txt.gz 15 | direct_url=annoDB/ccdsGene-hg38_20171008.DB.gz 56366edb79a9c2ccc73036ce2a3146e7 16 | source_type=txt 17 | 18 | [name] 19 | index=2 20 | type=VARCHAR(255) 21 | comment=Gene name (usually a CCDS transcript ID) 22 | 23 | [chr] 24 | index=3 25 | adj=RemoveLeading('chr') 26 | type=VARCHAR(20) 27 | 28 | [strand] 29 | index=4 30 | type=CHAR(1) NULL 31 | comment=which DNA strand contains the observed alleles 32 | 33 | [cdsStart] 34 | index=7 35 | type=INTEGER 36 | adj=IncreaseBy(1) 37 | comment=Coding region start 38 | 39 | [cdsEnd] 40 | index=8 41 | type=INTEGER 42 | comment=Coding region end 43 | 44 | [exonCount] 45 | index=9 46 | type=INTEGER NULL 47 | comment=Number of exons 48 | 49 | [score] 50 | index=12 51 | type=INTEGER NULL 52 | comment=Score 53 | 54 | [name2] 55 | index=13 56 | type=VARCHAR(255) 57 | comment=Alternate name 58 | 59 | [cdsStartStat] 60 | index=14 61 | type=VARCHAR(10) 62 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1' 63 | 64 | [cdsEndStat] 65 | index=15 66 | type=VARCHAR(10) 67 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1' 68 | -------------------------------------------------------------------------------- /resources/annotation/ccdsGene_exon-hg19_20110909.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | 8 | [linked fields] 9 | hg19=chr, exon_start, exon_end 10 | 11 | [data sources] 12 | anno_type=range 13 | description=CCDS exons 14 | version=hg19_20110909 15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz 16 | direct_url=annoDB/ccdsGene_exon-hg19_20110909.DB.gz 17 | source_type=txt 18 | 19 | [name] 20 | index=2 21 | type=VARCHAR(255) 22 | comment=CCDS gene name 23 | 24 | [chr] 25 | index=3 26 | adj=RemoveLeading('chr') 27 | type=VARCHAR(20) 28 | 29 | [strand] 30 | index=4 31 | type=CHAR(1) NULL 32 | comment=which DNA strand contains the observed alleles 33 | 34 | [txStart] 35 | index=5 36 | type=INTEGER 37 | adj=IncreaseBy(1) 38 | comment=Transcription start position 39 | 40 | [txEnd] 41 | index=6 42 | type=INTEGER 43 | comment=Transcription end position 44 | 45 | [cdsStart] 46 | index=7 47 | type=INTEGER 48 | adj=IncreaseBy(1) 49 | comment=Coding region start 50 | 51 | [cdsEnd] 52 | index=8 53 | type=INTEGER 54 | comment=Coding region end 55 | 56 | [exonCount] 57 | index=9 58 | type=INTEGER NULL 59 | comment=Number of exons 60 | 61 | [exon_start] 62 | index=10 63 | adj=SplitField(','), IncreaseBy(1) 64 | type=INTEGER NOT NULL 65 | comment=exon start position 66 | 67 | [exon_end] 68 | index=11 69 | adj=SplitField(',') 70 | type=INTEGER NOT NULL 71 | comment=exon end position 72 | 73 | [score] 74 | index=12 75 | type=INTEGER NULL 76 | comment=Score 77 | 78 | [name2] 79 | index=13 80 | type=VARCHAR(255) 81 | comment=Alternative name 82 | 83 | [cdsStartStat] 84 | index=14 85 | type=VARCHAR(10) 86 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1' 87 | 88 | [cdsEndStat] 89 | index=15 90 | type=VARCHAR(10) 91 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1' 92 | -------------------------------------------------------------------------------- /resources/annotation/ccdsGene_exon-hg19_20111206.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | 8 | [linked fields] 9 | hg19=chr, exon_start, exon_end 10 | 11 | [data sources] 12 | anno_type=range 13 | description=CCDS exons 14 | version=hg19_20111206 15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz 16 | direct_url=annoDB/ccdsGene_exon-hg19_20111206.DB.gz 17 | source_type=txt 18 | 19 | [name] 20 | index=2 21 | type=VARCHAR(255) 22 | comment=CCDS gene name 23 | 24 | [chr] 25 | index=3 26 | adj=RemoveLeading('chr') 27 | type=VARCHAR(20) 28 | 29 | [strand] 30 | index=4 31 | type=CHAR(1) NULL 32 | comment=which DNA strand contains the observed alleles 33 | 34 | [cdsStart] 35 | index=7 36 | type=INTEGER 37 | adj=IncreaseBy(1) 38 | comment=Coding region start 39 | 40 | [cdsEnd] 41 | index=8 42 | type=INTEGER 43 | comment=Coding region end 44 | 45 | [exonCount] 46 | index=9 47 | type=INTEGER NULL 48 | comment=Number of exons 49 | 50 | [exon_start] 51 | index=10 52 | adj=SplitField(','), IncreaseBy(1) 53 | type=INTEGER NOT NULL 54 | comment=exon start position 55 | 56 | [exon_end] 57 | index=11 58 | adj=SplitField(',') 59 | type=INTEGER NOT NULL 60 | comment=exon end position 61 | 62 | [score] 63 | index=12 64 | type=INTEGER NULL 65 | comment=Score 66 | 67 | [name2] 68 | index=13 69 | type=VARCHAR(255) 70 | comment=Alternative name 71 | 72 | [cdsStartStat] 73 | index=14 74 | type=VARCHAR(10) 75 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1' 76 | 77 | [cdsEndStat] 78 | index=15 79 | type=VARCHAR(10) 80 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1' 81 | -------------------------------------------------------------------------------- /resources/annotation/ccdsGene_exon-hg19_20130904.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | 8 | [linked fields] 9 | hg19=chr, exon_start, exon_end 10 | 11 | [data sources] 12 | anno_type=range 13 | description=High-confidence human gene annotations from the Consensus Coding Sequence (CCDS) project. This database contains all exon regions of the CCDS genes. 14 | version=hg19_20130904 15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz 16 | direct_url=annoDB/ccdsGene_exon-hg19_20130904.DB.gz 0e903b09c1c2bcd9f636d9477874dd82 17 | source_type=txt 18 | 19 | [name] 20 | index=2 21 | type=VARCHAR(255) 22 | comment=CCDS gene name 23 | 24 | [chr] 25 | index=3 26 | adj=RemoveLeading('chr') 27 | type=VARCHAR(20) 28 | 29 | [strand] 30 | index=4 31 | type=CHAR(1) NULL 32 | comment=which DNA strand contains the observed alleles 33 | 34 | [cdsStart] 35 | index=7 36 | type=INTEGER 37 | adj=IncreaseBy(1) 38 | comment=Coding region start 39 | 40 | [cdsEnd] 41 | index=8 42 | type=INTEGER 43 | comment=Coding region end 44 | 45 | [exonCount] 46 | index=9 47 | type=INTEGER NULL 48 | comment=Number of exons 49 | 50 | [exon_start] 51 | index=10 52 | adj=SplitField(','), IncreaseBy(1) 53 | type=INTEGER NOT NULL 54 | comment=exon start position 55 | 56 | [exon_end] 57 | index=11 58 | adj=SplitField(',') 59 | type=INTEGER NOT NULL 60 | comment=exon end position 61 | 62 | [score] 63 | index=12 64 | type=INTEGER NULL 65 | comment=Score 66 | 67 | [name2] 68 | index=13 69 | type=VARCHAR(255) 70 | comment=Alternative name 71 | 72 | [cdsStartStat] 73 | index=14 74 | type=VARCHAR(10) 75 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1' 76 | 77 | [cdsEndStat] 78 | index=15 79 | type=VARCHAR(10) 80 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1' 81 | -------------------------------------------------------------------------------- /resources/annotation/ccdsGene_exon-hg38_20171008.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Man Chong Leong (henryleong@rice.edu) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | 8 | [linked fields] 9 | hg38=chr, exon_start, exon_end 10 | 11 | [data sources] 12 | anno_type=range 13 | description=CCDS exons 14 | version=hg38_20171008 15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/ccdsGene.txt.gz 16 | direct_url=annoDB/ccdsGene_exon-hg38_20171008.DB.gz 293920c679221903b3e69031256d1432 17 | source_type=txt 18 | 19 | [name] 20 | index=2 21 | type=VARCHAR(255) 22 | comment=CCDS gene name 23 | 24 | [chr] 25 | index=3 26 | adj=RemoveLeading('chr') 27 | type=VARCHAR(20) 28 | 29 | [strand] 30 | index=4 31 | type=CHAR(1) NULL 32 | comment=which DNA strand contains the observed alleles 33 | 34 | [cdsStart] 35 | index=7 36 | type=INTEGER 37 | adj=IncreaseBy(1) 38 | comment=Coding region start 39 | 40 | [cdsEnd] 41 | index=8 42 | type=INTEGER 43 | comment=Coding region end 44 | 45 | [exonCount] 46 | index=9 47 | type=INTEGER NULL 48 | comment=Number of exons 49 | 50 | [exon_start] 51 | index=10 52 | adj=SplitField(','), IncreaseBy(1) 53 | type=INTEGER NOT NULL 54 | comment=exon start position 55 | 56 | [exon_end] 57 | index=11 58 | adj=SplitField(',') 59 | type=INTEGER NOT NULL 60 | comment=exon end position 61 | 62 | [score] 63 | index=12 64 | type=INTEGER NULL 65 | comment=Score 66 | 67 | [name2] 68 | index=13 69 | type=VARCHAR(255) 70 | comment=Alternative name 71 | 72 | [cdsStartStat] 73 | index=14 74 | type=VARCHAR(10) 75 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1' 76 | 77 | [cdsEndStat] 78 | index=15 79 | type=VARCHAR(10) 80 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1' 81 | -------------------------------------------------------------------------------- /resources/annotation/ccdsGene_exon_hg19-20111206.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | 8 | [linked fields] 9 | hg19=chr, exon_start, exon_end 10 | 11 | [data sources] 12 | anno_type=range 13 | description=CCDS exons 14 | version=hg19_20111206 15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz 16 | direct_url=annoDB/ccdsGene_exon-hg19_20111206.DB.gz 17 | source_type=txt 18 | 19 | [name] 20 | index=2 21 | type=VARCHAR(255) 22 | comment=CCDS gene name 23 | 24 | [chr] 25 | index=3 26 | adj=RemoveLeading('chr') 27 | type=VARCHAR(20) 28 | 29 | [strand] 30 | index=4 31 | type=CHAR(1) NULL 32 | comment=which DNA strand contains the observed alleles 33 | 34 | [cdsStart] 35 | index=7 36 | type=INTEGER 37 | adj=IncreaseBy(1) 38 | comment=Coding region start 39 | 40 | [cdsEnd] 41 | index=8 42 | type=INTEGER 43 | comment=Coding region end 44 | 45 | [exonCount] 46 | index=9 47 | type=INTEGER NULL 48 | comment=Number of exons 49 | 50 | [exon_start] 51 | index=10 52 | adj=SplitField(','), IncreaseBy(1) 53 | type=INTEGER NOT NULL 54 | comment=exon start position 55 | 56 | [exon_end] 57 | index=11 58 | adj=SplitField(',') 59 | type=INTEGER NOT NULL 60 | comment=exon end position 61 | 62 | [score] 63 | index=12 64 | type=INTEGER NULL 65 | comment=Score 66 | 67 | [name2] 68 | index=13 69 | type=VARCHAR(255) 70 | comment=Alternative name 71 | 72 | [cdsStartStat] 73 | index=14 74 | type=VARCHAR(10) 75 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1' 76 | 77 | [cdsEndStat] 78 | index=15 79 | type=VARCHAR(10) 80 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1' 81 | -------------------------------------------------------------------------------- /resources/annotation/ccdsGene_exon_hg38-20171008.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Man Chong Leong (henryleong@rice.edu) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | 8 | [linked fields] 9 | hg38=chr, exon_start, exon_end 10 | 11 | [data sources] 12 | anno_type=range 13 | description=CCDS exons 14 | version=hg38_20171008 15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/ccdsGene.txt.gz 16 | #direct_url=annoDB/ccdsGene_exon-hg19_20111206.DB.gz 17 | source_type=txt 18 | 19 | [name] 20 | index=2 21 | type=VARCHAR(255) 22 | comment=CCDS gene name 23 | 24 | [chr] 25 | index=3 26 | adj=RemoveLeading('chr') 27 | type=VARCHAR(20) 28 | 29 | [strand] 30 | index=4 31 | type=CHAR(1) NULL 32 | comment=which DNA strand contains the observed alleles 33 | 34 | [cdsStart] 35 | index=7 36 | type=INTEGER 37 | adj=IncreaseBy(1) 38 | comment=Coding region start 39 | 40 | [cdsEnd] 41 | index=8 42 | type=INTEGER 43 | comment=Coding region end 44 | 45 | [exonCount] 46 | index=9 47 | type=INTEGER NULL 48 | comment=Number of exons 49 | 50 | [exon_start] 51 | index=10 52 | adj=SplitField(','), IncreaseBy(1) 53 | type=INTEGER NOT NULL 54 | comment=exon start position 55 | 56 | [exon_end] 57 | index=11 58 | adj=SplitField(',') 59 | type=INTEGER NOT NULL 60 | comment=exon end position 61 | 62 | [score] 63 | index=12 64 | type=INTEGER NULL 65 | comment=Score 66 | 67 | [name2] 68 | index=13 69 | type=VARCHAR(255) 70 | comment=Alternative name 71 | 72 | [cdsStartStat] 73 | index=14 74 | type=VARCHAR(10) 75 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1' 76 | 77 | [cdsEndStat] 78 | index=15 79 | type=VARCHAR(10) 80 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1' 81 | -------------------------------------------------------------------------------- /resources/annotation/ccdsGene_hg19-20111206.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr, cdsStart, cdsEnd 9 | 10 | [data sources] 11 | anno_type=range 12 | description=CCDS Genes 13 | version=hg19_20111206 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz 15 | direct_url=annoDB/ccdsGene-hg19_20111206.DB.gz 16 | source_type=txt 17 | 18 | [name] 19 | index=2 20 | type=VARCHAR(255) 21 | comment=Gene name (usually a CCDS transcript ID) 22 | 23 | [chr] 24 | index=3 25 | adj=RemoveLeading('chr') 26 | type=VARCHAR(20) 27 | 28 | [strand] 29 | index=4 30 | type=CHAR(1) NULL 31 | comment=which DNA strand contains the observed alleles 32 | 33 | [cdsStart] 34 | index=7 35 | type=INTEGER 36 | adj=IncreaseBy(1) 37 | comment=Coding region start 38 | 39 | [cdsEnd] 40 | index=8 41 | type=INTEGER 42 | comment=Coding region end 43 | 44 | [exonCount] 45 | index=9 46 | type=INTEGER NULL 47 | comment=Number of exons 48 | 49 | [score] 50 | index=12 51 | type=INTEGER NULL 52 | comment=Score 53 | 54 | [name2] 55 | index=13 56 | type=VARCHAR(255) 57 | comment=Alternate name 58 | 59 | [cdsStartStat] 60 | index=14 61 | type=VARCHAR(10) 62 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1' 63 | 64 | [cdsEndStat] 65 | index=15 66 | type=VARCHAR(10) 67 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1' 68 | -------------------------------------------------------------------------------- /resources/annotation/cytoBand-hg18_20111216.ann: -------------------------------------------------------------------------------- 1 | #Variant tools schema version 1.0 2 | # 3 | # $File: cytoBand.ann $ 4 | # 5 | # This file is part of variant_tools, a software application to annotate, 6 | # summarize, and filter variants for next-gen sequencing ananlysis. 7 | # Please visit http://variant_tools.sourceforge.net # for details. 8 | # 9 | # Copyright (C) 2004 - 2010 Bo Peng (bpeng@mdanderson.org) 10 | # 11 | # This program is free software: you can redistribute it and/or modify 12 | # it under the terms of the GNU General Public License as published by 13 | # the Free Software Foundation, either version 3 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU General Public License 22 | # along with this program. If not, see . 23 | # 24 | # Please refer to http://varianttools.sourceforge.net/Annotation/New for 25 | # a description of the format of this file. 26 | # 27 | # 28 | 29 | [linked fields] 30 | hg18=chr, begin, end 31 | 32 | [data sources] 33 | anno_type=range 34 | description=Cyto Band 35 | version=hg18_20111216 36 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/cytoBand.txt.gz 37 | direct_url=annoDB/cytoBand-hg18_20111216.DB.gz 38 | source_type=txt 39 | 40 | [chr] 41 | index=1 42 | adj=RemoveLeading('chr') 43 | type=chromosome 44 | 45 | [begin] 46 | index=2 47 | type=INTEGER 48 | adj=IncreaseBy(1) 49 | comment=start position on chromosome 50 | 51 | [end] 52 | index=3 53 | type=INTEGER 54 | comment=end position on chromosome 55 | 56 | [name] 57 | index=1,4 58 | adj=lambda x: (x[0][3:] + x[1]) if x[0].startswith('chr') else (x[0] + x[1]) 59 | type=VARCHAR(255) 60 | comment=name of cytogenic band 61 | 62 | [gieStain] 63 | index=5 64 | type=VARCHAR(255) 65 | comment=giemsa stain results 66 | -------------------------------------------------------------------------------- /resources/annotation/keggPathway-20110823.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | # 7 | # To retrieve the kegg pathway details, a database query was run using the 8 | # "hg19" MySQL database from USCS. We have a local copy of the database which can be 9 | # accessed with a mysql client. Or one can use the public MySQL server directly at UCSC: 10 | # 11 | # mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A 12 | # (for help connecting, see: http://genome.ucsc.edu/FAQ/FAQdownloads#download29) 13 | # 14 | # Kegg Pathway Query: This will return three columns 15 | # 16 | # select distinct ckm.ccdsId ccds_id, 17 | # kmd.mapID kegg_pathway_id, 18 | # kmd.description kegg_pathway_description 19 | # from hg19.ccdsKgMap ckm 20 | # join hg19.keggPathway kp on ckm.geneId=kp.kgID 21 | # join hg19.keggMapDesc kmd on kp.mapId=kmd.mapId 22 | # into outfile '/tmp/keggPathway.txt' 23 | # fields terminated by '\t' lines terminated by '\n' 24 | # 25 | 26 | [linked fields] 27 | *=ccdsId 28 | 29 | [data sources] 30 | description=kegg pathway for CCDS genes 31 | version=20110823 32 | anno_type=field 33 | direct_url=annoDB/keggPathway-20110823.DB.gz c97d10fa656535c710280f46b37c95a1 34 | source_url=annoDB/keggPathway-20110823.txt.gz 35 | source_type=txt 36 | 37 | [ccdsId] 38 | index=1 39 | type=VARCHAR(24) NULL 40 | comment=CCDS gene ID 41 | 42 | [KgID] 43 | index=2 44 | type=VARCHAR(24) NULL 45 | comment=Kegg pathway ID 46 | 47 | [KgDesc] 48 | index=3 49 | type=VARCHAR(128) NULL 50 | comment=Description of pathway 51 | -------------------------------------------------------------------------------- /resources/annotation/knownGene-hg18_20110909.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg18=chr, txStart, txEnd 9 | 10 | [data sources] 11 | anno_type=range 12 | description=UCSC Known Genes 13 | version=hg18_20110909 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/knownGene.txt.gz 15 | direct_url=annoDB/knownGene-hg18_20110909.DB.gz 16 | source_type=txt 17 | 18 | [chr] 19 | index=2 20 | adj=RemoveLeading('chr') 21 | type=VARCHAR(20) 22 | 23 | [strand] 24 | index=3 25 | type=CHAR(1) NULL 26 | comment=which DNA strand contains the observed alleles 27 | 28 | [txStart] 29 | index=4 30 | type=INTEGER 31 | adj=IncreaseBy(1) 32 | comment=Transcription start position 33 | 34 | [txEnd] 35 | index=5 36 | type=INTEGER 37 | comment=Transcription end position 38 | 39 | [cdsStart] 40 | index=6 41 | type=INTEGER 42 | adj=IncreaseBy(1) 43 | comment=Coding region start 44 | 45 | [cdsEnd] 46 | index=7 47 | type=INTEGER 48 | comment=Coding region end 49 | 50 | [exonCount] 51 | index=8 52 | type=INTEGER NULL 53 | comment=Number of exons 54 | -------------------------------------------------------------------------------- /resources/annotation/knownGene-hg18_20121219.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg18=chr, txStart, txEnd 9 | 10 | [data sources] 11 | anno_type=range 12 | description=UCSC Known Genes 13 | version=hg18_20121219 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/knownGene.txt.gz 15 | direct_url=annoDB/knownGene-hg18_20121219.DB.gz 16 | source_type=txt 17 | 18 | [name] 19 | index=1 20 | type=VARCHAR(48) 21 | comment=Name of gene such as uc001aaa.3 22 | 23 | [chr] 24 | index=2 25 | adj=RemoveLeading('chr') 26 | type=VARCHAR(20) 27 | 28 | [strand] 29 | index=3 30 | type=CHAR(1) NULL 31 | comment=which DNA strand contains the observed alleles 32 | 33 | [txStart] 34 | index=4 35 | type=INTEGER 36 | adj=IncreaseBy(1) 37 | comment=Transcription start position 38 | 39 | [txEnd] 40 | index=5 41 | type=INTEGER 42 | comment=Transcription end position 43 | 44 | [cdsStart] 45 | index=6 46 | type=INTEGER 47 | adj=IncreaseBy(1) 48 | comment=Coding region start 49 | 50 | [cdsEnd] 51 | index=7 52 | type=INTEGER 53 | comment=Coding region end 54 | 55 | [exonCount] 56 | index=8 57 | type=INTEGER NULL 58 | comment=Number of exons 59 | -------------------------------------------------------------------------------- /resources/annotation/knownGene-hg19_20110909.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr, txStart, txEnd 9 | 10 | [data sources] 11 | anno_type=range 12 | description=UCSC Known Genes 13 | version=hg19_20110909 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz 15 | direct_url=annoDB/knownGene-hg19_20110909.DB.gz c1f4cef575aad2c07d3fdff648ab2bda 16 | source_type=txt 17 | 18 | [chr] 19 | index=2 20 | adj=RemoveLeading('chr') 21 | type=VARCHAR(20) 22 | 23 | [strand] 24 | index=3 25 | type=CHAR(1) NULL 26 | comment=which DNA strand contains the observed alleles 27 | 28 | [txStart] 29 | index=4 30 | type=INTEGER 31 | adj=IncreaseBy(1) 32 | comment=Transcription start position 33 | 34 | [txEnd] 35 | index=5 36 | type=INTEGER 37 | comment=Transcription end position 38 | 39 | [cdsStart] 40 | index=6 41 | type=INTEGER 42 | adj=IncreaseBy(1) 43 | comment=Coding region start 44 | 45 | [cdsEnd] 46 | index=7 47 | type=INTEGER 48 | comment=Coding region end 49 | 50 | [exonCount] 51 | index=8 52 | type=INTEGER NULL 53 | comment=Number of exons 54 | -------------------------------------------------------------------------------- /resources/annotation/knownGene-hg19_20121219.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr, txStart, txEnd 9 | 10 | [data sources] 11 | anno_type=range 12 | description=UCSC Known Genes 13 | version=hg19_20121219 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz 15 | direct_url=annoDB/knownGene-hg19_20121219.DB.gz c07367da2392c5e0277e232c89f49c68 16 | source_type=txt 17 | 18 | [name] 19 | index=1 20 | type=VARCHAR(48) 21 | comment=Name of gene such as uc001aaa.3 22 | 23 | [chr] 24 | index=2 25 | adj=RemoveLeading('chr') 26 | type=VARCHAR(20) 27 | 28 | [strand] 29 | index=3 30 | type=CHAR(1) NULL 31 | comment=which DNA strand contains the observed alleles 32 | 33 | [txStart] 34 | index=4 35 | type=INTEGER 36 | adj=IncreaseBy(1) 37 | comment=Transcription start position 38 | 39 | [txEnd] 40 | index=5 41 | type=INTEGER 42 | comment=Transcription end position 43 | 44 | [cdsStart] 45 | index=6 46 | type=INTEGER 47 | adj=IncreaseBy(1) 48 | comment=Coding region start 49 | 50 | [cdsEnd] 51 | index=7 52 | type=INTEGER 53 | comment=Coding region end 54 | 55 | [exonCount] 56 | index=8 57 | type=INTEGER NULL 58 | comment=Number of exons 59 | -------------------------------------------------------------------------------- /resources/annotation/knownGene-hg19_20130904.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr, txStart, txEnd 9 | 10 | [data sources] 11 | anno_type=range 12 | description=Gene predictions based on data from RefSeq, Genbank, CCDS and UniProt, from the UCSC KnownGene track. 13 | version=hg19_20130904 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz 15 | direct_url=annoDB/knownGene-hg19_20130904.DB.gz 0dfff9b8e479bbae7f0d8cb4fb406a29 16 | source_type=txt 17 | 18 | [name] 19 | index=1 20 | type=VARCHAR(48) 21 | comment=Name of gene such as uc001aaa.3 22 | 23 | [chr] 24 | index=2 25 | adj=RemoveLeading('chr') 26 | type=VARCHAR(20) 27 | 28 | [strand] 29 | index=3 30 | type=CHAR(1) NULL 31 | comment=which DNA strand contains the observed alleles 32 | 33 | [txStart] 34 | index=4 35 | type=INTEGER 36 | adj=IncreaseBy(1) 37 | comment=Transcription start position 38 | 39 | [txEnd] 40 | index=5 41 | type=INTEGER 42 | comment=Transcription end position 43 | 44 | [cdsStart] 45 | index=6 46 | type=INTEGER 47 | adj=IncreaseBy(1) 48 | comment=Coding region start 49 | 50 | [cdsEnd] 51 | index=7 52 | type=INTEGER 53 | comment=Coding region end 54 | 55 | [exonCount] 56 | index=8 57 | type=INTEGER NULL 58 | comment=Number of exons 59 | -------------------------------------------------------------------------------- /resources/annotation/knownGene-hg38_20160328.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Man Chong Leong (henryleong@rice.edu) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg38=chr, txStart, txEnd 9 | 10 | [data sources] 11 | anno_type=range 12 | description=Gene predictions based on data from RefSeq, Genbank, CCDS and UniProt, from the UCSC KnownGene track. 13 | version=hg38_20160328 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/knownGene.txt.gz 15 | direct_url=annoDB/knownGene-hg38_20160328.DB.gz 6f1d0d4b00139626f34198cef68eb84f 16 | source_type=txt 17 | 18 | [name] 19 | index=1 20 | type=VARCHAR(48) 21 | comment=Name of gene such as uc001aaa.3 22 | 23 | [chr] 24 | index=2 25 | adj=RemoveLeading('chr') 26 | type=VARCHAR(20) 27 | 28 | [strand] 29 | index=3 30 | type=CHAR(1) NULL 31 | comment=which DNA strand contains the observed alleles 32 | 33 | [txStart] 34 | index=4 35 | type=INTEGER 36 | adj=IncreaseBy(1) 37 | comment=Transcription start position 38 | 39 | [txEnd] 40 | index=5 41 | type=INTEGER 42 | comment=Transcription end position 43 | 44 | [cdsStart] 45 | index=6 46 | type=INTEGER 47 | adj=IncreaseBy(1) 48 | comment=Coding region start 49 | 50 | [cdsEnd] 51 | index=7 52 | type=INTEGER 53 | comment=Coding region end 54 | 55 | [exonCount] 56 | index=8 57 | type=INTEGER NULL 58 | comment=Number of exons 59 | 60 | [exonStarts] 61 | index=9 62 | type=VARCHAR(255) 63 | comment=Exon start positions (or end positions for minus strand item) 64 | 65 | [exonEnds] 66 | index=10 67 | type=VARCHAR(255) 68 | comment=Exon end positions (or start positions for minus strand item) 69 | 70 | [proteinID] 71 | index=11 72 | type=VARCHAR(40) 73 | comment=UniProt display ID, UniProt accession, or RefSeq protein ID 74 | 75 | [alignID] 76 | index=12 77 | type=VARCHAR(255) 78 | comment=Unique identifier (GENCODE transcript ID for GENCODE Basic) 79 | -------------------------------------------------------------------------------- /resources/annotation/knownGene_exon-hg18_20110909.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg18=chr, exon_start, exon_end 9 | 10 | [data sources] 11 | anno_type=range 12 | description=Exon locations of UCSC Known Genes 13 | version=hg18_20110909 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/knownGene.txt.gz 15 | direct_url=annoDB/knownGene_exon-hg18_20110909.DB.gz 16 | source_type=txt 17 | 18 | [chr] 19 | index=2 20 | adj=RemoveLeading('chr') 21 | type=VARCHAR(20) 22 | 23 | [strand] 24 | index=3 25 | type=CHAR(1) NULL 26 | comment=which DNA strand contains the observed alleles 27 | 28 | [txStart] 29 | index=4 30 | type=INTEGER 31 | adj=IncreaseBy(1) 32 | comment=Transcription start position 33 | 34 | [txEnd] 35 | index=5 36 | type=INTEGER 37 | comment=Transcription end position 38 | 39 | [cdsStart] 40 | index=6 41 | type=INTEGER 42 | adj=IncreaseBy(1) 43 | comment=Coding region start 44 | 45 | [cdsEnd] 46 | index=7 47 | type=INTEGER 48 | comment=Coding region end 49 | 50 | [exonCount] 51 | index=8 52 | type=INTEGER NULL 53 | comment=Number of exons 54 | 55 | [exon_start] 56 | index=9 57 | adj=SplitField(','), IncreaseBy(1) 58 | type=INTEGER NOT NULL 59 | comment=exon start position 60 | 61 | [exon_end] 62 | index=10 63 | adj=SplitField(',') 64 | type=INTEGER NOT NULL 65 | comment=exon end position 66 | -------------------------------------------------------------------------------- /resources/annotation/knownGene_exon-hg19_20110909.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr, exon_start, exon_end 9 | 10 | [data sources] 11 | anno_type=range 12 | description=UCSC Known Genes 13 | version=hg19_20110909 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz 15 | direct_url=annoDB/knownGene_exon-hg19_20110909.DB.gz d24b555a54746f715fa6bf83d7d2643f 16 | source_type=txt 17 | 18 | [chr] 19 | index=2 20 | adj=RemoveLeading('chr') 21 | type=VARCHAR(20) 22 | 23 | [strand] 24 | index=3 25 | type=CHAR(1) NULL 26 | comment=which DNA strand contains the observed alleles 27 | 28 | [txStart] 29 | index=4 30 | type=INTEGER 31 | adj=IncreaseBy(1) 32 | comment=Transcription start position 33 | 34 | [txEnd] 35 | index=5 36 | type=INTEGER 37 | comment=Transcription end position 38 | 39 | [cdsStart] 40 | index=6 41 | type=INTEGER 42 | adj=IncreaseBy(1) 43 | comment=Coding region start 44 | 45 | [cdsEnd] 46 | index=7 47 | type=INTEGER 48 | comment=Coding region end 49 | 50 | [exonCount] 51 | index=8 52 | type=INTEGER NULL 53 | comment=Number of exons 54 | 55 | [exon_start] 56 | index=9 57 | adj=SplitField(','), IncreaseBy(1) 58 | type=INTEGER NOT NULL 59 | comment=exon start position 60 | 61 | [exon_end] 62 | index=10 63 | adj=SplitField(',') 64 | type=INTEGER NOT NULL 65 | comment=exon end position 66 | -------------------------------------------------------------------------------- /resources/annotation/knownGene_exon-hg19_20130904.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr, exon_start, exon_end 9 | 10 | [data sources] 11 | anno_type=range 12 | description=Gene predictions based on data from RefSeq, Genbank, CCDS and UniProt, from the UCSC KnownGene track. This 13 | database contains all exome regions of the UCSC known gene database. 14 | version=hg19_20130904 15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz 16 | direct_url=annoDB/knownGene_exon-hg19_20130904.DB.gz b944475f2b889f6d545d39682ec5b066 17 | source_type=txt 18 | 19 | [chr] 20 | index=2 21 | adj=RemoveLeading('chr') 22 | type=VARCHAR(20) 23 | 24 | [strand] 25 | index=3 26 | type=CHAR(1) NULL 27 | comment=which DNA strand contains the observed alleles 28 | 29 | [txStart] 30 | index=4 31 | type=INTEGER 32 | adj=IncreaseBy(1) 33 | comment=Transcription start position 34 | 35 | [txEnd] 36 | index=5 37 | type=INTEGER 38 | comment=Transcription end position 39 | 40 | [cdsStart] 41 | index=6 42 | type=INTEGER 43 | adj=IncreaseBy(1) 44 | comment=Coding region start 45 | 46 | [cdsEnd] 47 | index=7 48 | type=INTEGER 49 | comment=Coding region end 50 | 51 | [exonCount] 52 | index=8 53 | type=INTEGER NULL 54 | comment=Number of exons 55 | 56 | [exon_start] 57 | index=9 58 | adj=SplitField(','), IncreaseBy(1) 59 | type=INTEGER NOT NULL 60 | comment=exon start position 61 | 62 | [exon_end] 63 | index=10 64 | adj=SplitField(',') 65 | type=INTEGER NOT NULL 66 | comment=exon end position 67 | -------------------------------------------------------------------------------- /resources/annotation/phastCons-hg19_20110909.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | # 7 | [linked fields] 8 | hg19=chr, start, end 9 | 10 | [data sources] 11 | anno_type=range 12 | description=PhastCons Conservation Scores 13 | version=hg19_20110909 14 | direct_url=annoDB/phastCons-hg19_20110909.DB.gz 5e9c3b8434330a7bc8230bfa5bc10812 15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/phastCons46way.txt.gz 16 | source_type=txt 17 | 18 | [chr] 19 | index=2 20 | adj=RemoveLeading('chr') 21 | type=VARCHAR(20) 22 | 23 | [start] 24 | index=3 25 | type=INTEGER 26 | adj=IncreaseBy(1) 27 | comment=Start position in chromosome 28 | 29 | [end] 30 | index=4 31 | type=INTEGER 32 | comment=End position in chromosome 33 | 34 | [name] 35 | index=5 36 | type=VARCHAR(225) 37 | comment=Name of conserved region 38 | 39 | [count] 40 | index=7 41 | type=INTEGER 42 | comment=Number of values in this block 43 | 44 | [valid_count] 45 | index=12 46 | type=INTEGER 47 | comment=Number of valid values in this block 48 | 49 | [lower_limit] 50 | index=10 51 | type=DOUBLE 52 | comment=Lowest value in this block 53 | 54 | [data_range] 55 | index=11 56 | type=DOUBLE 57 | comment=Spread of values in this block. lower_limit + data_range = upper_limit 58 | 59 | [sum_data] 60 | index=13 61 | type=DOUBLE 62 | comment=Sum of values in this block (can be used for calculate average and stddev of conservation scores) 63 | 64 | [sum_squares] 65 | index=14 66 | type=DOUBLE 67 | comment=Sum of values squared in this block (can be used for calculating stddev of conservation scores) 68 | -------------------------------------------------------------------------------- /resources/annotation/phastCons-hg19_20130322.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | # 7 | [linked fields] 8 | hg19=chr, start, end 9 | 10 | [data sources] 11 | anno_type=range 12 | description=PhastCons Conservation Scores 13 | version=hg19_20130322 14 | direct_url=annoDB/phastCons-hg19_20130322.DB.gz 0309a4eb2fdd291f977fe45434879a85 15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/phastCons46way.txt.gz 16 | source_type=txt 17 | 18 | [chr] 19 | index=2 20 | adj=RemoveLeading('chr') 21 | type=VARCHAR(20) 22 | 23 | [start] 24 | index=3 25 | type=INTEGER 26 | adj=IncreaseBy(1) 27 | comment=Start position in chromosome 28 | 29 | [end] 30 | index=4 31 | type=INTEGER 32 | comment=End position in chromosome 33 | 34 | [name] 35 | index=5 36 | type=VARCHAR(225) 37 | comment=Name of conserved region 38 | 39 | [total_count] 40 | index=7 41 | type=INTEGER 42 | comment=Number of values in this block 43 | 44 | [valid_count] 45 | index=12 46 | type=INTEGER 47 | comment=Number of valid values in this block 48 | 49 | [lower_limit] 50 | index=10 51 | type=DOUBLE 52 | comment=Lowest value in this block 53 | 54 | [data_range] 55 | index=11 56 | type=DOUBLE 57 | comment=Spread of values in this block. lower_limit + data_range = upper_limit 58 | 59 | [sum_data] 60 | index=13 61 | type=DOUBLE 62 | comment=Sum of values in this block (can be used for calculate average and stddev of conservation scores) 63 | 64 | [sum_squares] 65 | index=14 66 | type=DOUBLE 67 | comment=Sum of values squared in this block (can be used for calculating stddev of conservation scores) 68 | -------------------------------------------------------------------------------- /resources/annotation/phastCons-hg38_20150913.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Man Chong Leong (henryleong@rice.edu) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | # 7 | [linked fields] 8 | hg38=chr, chr_start, chr_end 9 | 10 | [data sources] 11 | anno_type=range 12 | description=PhastCons Conservation Scores 13 | version=hg38_20150913 14 | direct_url=annoDB/phastCons-hg38_20150913.DB.gz f1a21eefa6b0a330a43ea5dac2cf70d6 15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/phastCons100way.txt.gz 16 | source_type=txt 17 | 18 | [chr] 19 | index=2 20 | adj=RemoveLeading('chr') 21 | type=VARCHAR(20) 22 | comment=Reference sequence chromosome or scaffold 23 | 24 | [chr_start] 25 | index=3 26 | type=INTEGER 27 | adj=IncreaseBy(1) 28 | comment=Start position in chromosome 29 | 30 | [chr_end] 31 | index=4 32 | type=INTEGER 33 | comment=End position in chromosome 34 | 35 | [name] 36 | index=5 37 | type=VARCHAR(225) 38 | comment=Name of conserved region 39 | 40 | [total_count] 41 | index=7 42 | type=INTEGER 43 | comment=Number of values in this block 44 | 45 | [valid_count] 46 | index=12 47 | type=INTEGER 48 | comment=Number of valid values in this block 49 | 50 | [lower_limit] 51 | index=10 52 | type=DOUBLE 53 | comment=Lowest value in this block 54 | 55 | [data_range] 56 | index=11 57 | type=DOUBLE 58 | comment=Spread of values in this block. lower_limit + data_range = upper_limit 59 | 60 | [sum_data] 61 | index=13 62 | type=DOUBLE 63 | comment=Sum of values in this block (can be used for calculate average and stddev of conservation scores) 64 | 65 | [sum_squares] 66 | index=14 67 | type=DOUBLE 68 | comment=Sum of values squared in this block (can be used for calculating stddev of conservation scores) 69 | 70 | [file] 71 | index=9 72 | type=VARCHAR(255) 73 | comment=path name to data file, one byte per value 74 | -------------------------------------------------------------------------------- /resources/annotation/phastConsElements-hg19_20130622.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | # 7 | [linked fields] 8 | hg19=chr, start, end 9 | 10 | [data sources] 11 | anno_type=range 12 | description=PhastCons Conservation Scores 13 | version=hg19_20130622 14 | direct_url=annoDB/phastConsElements-hg19_20130622.DB.gz c212ac8711f0e5194777e7dc9a61b447 15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/phastConsElements46way.txt.gz 16 | source_type=txt 17 | 18 | [chr] 19 | index=2 20 | adj=RemoveLeading('chr') 21 | type=VARCHAR(20) 22 | 23 | [start] 24 | index=3 25 | type=INTEGER 26 | adj=IncreaseBy(1) 27 | comment=Start position in chromosome 28 | 29 | [end] 30 | index=4 31 | type=INTEGER 32 | comment=End position in chromosome 33 | 34 | [name] 35 | index=5 36 | type=VARCHAR(225) 37 | comment=Name of conserved region 38 | 39 | [score] 40 | index=6 41 | type=INTEGER 42 | comment=Phast cons score from 0 to 1000 43 | -------------------------------------------------------------------------------- /resources/annotation/phastConsElements-hg38_20150913.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Man Chong Leong (henryleong@rice.edu) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | # 7 | [linked fields] 8 | hg38=chr, start, end 9 | 10 | [data sources] 11 | anno_type=range 12 | description=PhastCons Conservation Scores 13 | version=hg38_20150913 14 | direct_url=annoDB/phastConsElements-hg38_20150913.DB.gz 5b9649e1ae18825622090a4e1f5e07db 15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/phastConsElements100way.txt.gz 16 | source_type=txt 17 | 18 | [chr] 19 | index=2 20 | adj=RemoveLeading('chr') 21 | type=VARCHAR(255) 22 | comment=Reference sequence chromosome or scaffold 23 | 24 | [start] 25 | index=3 26 | type=INTEGER 27 | adj=IncreaseBy(1) 28 | comment=Start position in chromosome 29 | 30 | [end] 31 | index=4 32 | type=INTEGER 33 | comment=End position in chromosome 34 | 35 | [name] 36 | index=5 37 | type=VARCHAR(225) 38 | comment=Name of conserved region 39 | 40 | [score] 41 | index=6 42 | type=INTEGER 43 | comment=Phast cons score from 0 to 1000 44 | -------------------------------------------------------------------------------- /resources/annotation/refGene-hg18_20110909.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg18=chr, txStart, txEnd 9 | 10 | [data sources] 11 | anno_type=range 12 | description=refseq Genes 13 | version=hg18_20110909 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz 15 | direct_url=annoDB/refGene-hg18_20110909.DB.gz 16 | source_type=txt 17 | 18 | [name] 19 | index=2 20 | type=VARCHAR(255) 21 | comment=Gene name 22 | 23 | [chr] 24 | index=3 25 | adj=RemoveLeading('chr') 26 | type=VARCHAR(20) 27 | 28 | [strand] 29 | index=4 30 | type=CHAR(1) NULL 31 | comment=which DNA strand contains the observed alleles 32 | 33 | [txStart] 34 | index=5 35 | type=INTEGER 36 | adj=IncreaseBy(1) 37 | comment=Transcription start position 38 | 39 | [txEnd] 40 | index=6 41 | type=INTEGER 42 | comment=Transcription end position 43 | 44 | [cdsStart] 45 | index=7 46 | type=INTEGER 47 | adj=IncreaseBy(1) 48 | comment=Coding region start 49 | 50 | [cdsEnd] 51 | index=8 52 | type=INTEGER 53 | comment=Coding region end 54 | 55 | [exonCount] 56 | index=9 57 | type=INTEGER NULL 58 | comment=Number of exons 59 | 60 | [score] 61 | index=12 62 | type=INTEGER NULL 63 | comment=Score 64 | 65 | [name2] 66 | index=13 67 | type=VARCHAR(255) 68 | comment=Alternative name 69 | 70 | [cdsStartStat] 71 | index=14 72 | type=VARCHAR(10) 73 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1' 74 | 75 | [cdsEndStat] 76 | index=15 77 | type=VARCHAR(10) 78 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1' 79 | -------------------------------------------------------------------------------- /resources/annotation/refGene-hg19_20110909.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr, txStart, txEnd 9 | 10 | [data sources] 11 | anno_type=range 12 | description=refseq Genes 13 | version=hg19_20110909 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/refGene.txt.gz 15 | direct_url=annoDB/refGene-hg19_20110909.DB.gz 03443479f56812cea4b2d42aebbc1151 16 | source_type=txt 17 | 18 | [name] 19 | index=2 20 | type=VARCHAR(255) 21 | comment=Gene name 22 | 23 | [chr] 24 | index=3 25 | adj=RemoveLeading('chr') 26 | type=VARCHAR(20) 27 | 28 | [strand] 29 | index=4 30 | type=CHAR(1) NULL 31 | comment=which DNA strand contains the observed alleles 32 | 33 | [txStart] 34 | index=5 35 | type=INTEGER 36 | adj=IncreaseBy(1) 37 | comment=Transcription start position 38 | 39 | [txEnd] 40 | index=6 41 | type=INTEGER 42 | comment=Transcription end position 43 | 44 | [cdsStart] 45 | index=7 46 | type=INTEGER 47 | adj=IncreaseBy(1) 48 | comment=Coding region start 49 | 50 | [cdsEnd] 51 | index=8 52 | type=INTEGER 53 | comment=Coding region end 54 | 55 | [exonCount] 56 | index=9 57 | type=INTEGER NULL 58 | comment=Number of exons 59 | 60 | [score] 61 | index=12 62 | type=INTEGER NULL 63 | comment=Score 64 | 65 | [name2] 66 | index=13 67 | type=VARCHAR(255) 68 | comment=Alternative name 69 | 70 | [cdsStartStat] 71 | index=14 72 | type=VARCHAR(10) 73 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1' 74 | 75 | [cdsEndStat] 76 | index=15 77 | type=VARCHAR(10) 78 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1' 79 | -------------------------------------------------------------------------------- /resources/annotation/refGene_exon-hg18_20110909.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg18=chr, exon_start, exon_end 9 | 10 | [data sources] 11 | anno_type=range 12 | description=refseq Genes 13 | version=hg18_20110909 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz 15 | direct_url=annoDB/refGene_exon-hg18_20110909.DB.gz 16 | source_type=txt 17 | 18 | [name] 19 | index=2 20 | type=VARCHAR(255) 21 | comment=Gene name 22 | 23 | [chr] 24 | index=3 25 | adj=RemoveLeading('chr') 26 | type=VARCHAR(20) 27 | 28 | [strand] 29 | index=4 30 | type=CHAR(1) NULL 31 | comment=which DNA strand contains the observed alleles 32 | 33 | [txStart] 34 | index=5 35 | type=INTEGER 36 | adj=IncreaseBy(1) 37 | comment=Transcription start position 38 | 39 | [txEnd] 40 | index=6 41 | type=INTEGER 42 | comment=Transcription end position 43 | 44 | [cdsStart] 45 | index=7 46 | type=INTEGER 47 | adj=IncreaseBy(1) 48 | comment=Coding region start 49 | 50 | [cdsEnd] 51 | index=8 52 | type=INTEGER 53 | comment=Coding region end 54 | 55 | [exonCount] 56 | index=9 57 | type=INTEGER NULL 58 | comment=Number of exons 59 | 60 | [exon_start] 61 | index=10 62 | adj=SplitField(','), IncreaseBy(1) 63 | type=INTEGER NOT NULL 64 | comment=exon start position 65 | 66 | [exon_end] 67 | index=11 68 | adj=SplitField(',') 69 | type=INTEGER NOT NULL 70 | comment=exon end position 71 | 72 | [score] 73 | index=12 74 | type=INTEGER NULL 75 | comment=Score 76 | 77 | [name2] 78 | index=13 79 | type=VARCHAR(255) 80 | comment=Alternative name 81 | 82 | [cdsStartStat] 83 | index=14 84 | type=VARCHAR(10) 85 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1' 86 | 87 | [cdsEndStat] 88 | index=15 89 | type=VARCHAR(10) 90 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1' 91 | -------------------------------------------------------------------------------- /resources/annotation/refGene_exon-hg19_20110909.ann: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New 5 | # for a description of the format of this file. 6 | 7 | [linked fields] 8 | hg19=chr, exon_start, exon_end 9 | 10 | [data sources] 11 | anno_type=range 12 | description=Exon locations of refseq Genes 13 | version=hg19_20110909 14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/refGene.txt.gz 15 | direct_url=annoDB/refGene_exon-hg19_20110909.DB.gz 0cde491895e87928560511b9d7ef91d1 16 | source_type=txt 17 | 18 | [name] 19 | index=2 20 | type=VARCHAR(255) 21 | comment=Gene name 22 | 23 | [chr] 24 | index=3 25 | adj=RemoveLeading('chr') 26 | type=VARCHAR(20) 27 | 28 | [strand] 29 | index=4 30 | type=CHAR(1) NULL 31 | comment=which DNA strand contains the observed alleles 32 | 33 | [txStart] 34 | index=5 35 | type=INTEGER 36 | adj=IncreaseBy(1) 37 | comment=Transcription start position 38 | 39 | [txEnd] 40 | index=6 41 | type=INTEGER 42 | comment=Transcription end position 43 | 44 | [cdsStart] 45 | index=7 46 | type=INTEGER 47 | adj=IncreaseBy(1) 48 | comment=Coding region start 49 | 50 | [cdsEnd] 51 | index=8 52 | type=INTEGER 53 | comment=Coding region end 54 | 55 | [exonCount] 56 | index=9 57 | type=INTEGER NULL 58 | comment=Number of exons 59 | 60 | [exon_start] 61 | index=10 62 | adj=SplitField(','), IncreaseBy(1) 63 | type=INTEGER NOT NULL 64 | comment=exon start position 65 | 66 | [exon_end] 67 | index=11 68 | adj=SplitField(',') 69 | type=INTEGER NOT NULL 70 | comment=exon end position 71 | 72 | [score] 73 | index=12 74 | type=INTEGER NULL 75 | comment=Score 76 | 77 | [name2] 78 | index=13 79 | type=VARCHAR(255) 80 | comment=Alternative name 81 | 82 | [cdsStartStat] 83 | index=14 84 | type=VARCHAR(10) 85 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1' 86 | 87 | [cdsEndStat] 88 | index=15 89 | type=VARCHAR(10) 90 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1' 91 | -------------------------------------------------------------------------------- /resources/format/ANNOVAR.fmt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Format/New for 5 | # a description of the format of this file. 6 | # 7 | 8 | [format description] 9 | description=Input format of ANNOVAR. No genotype is defined. 10 | variant=chr,pos,ref,alt 11 | delimiter=None 12 | export_by=chr,pos,ref,alt 13 | 14 | [DEFAULT] 15 | # one or more fields that will be outputted in the comment field, if specified. 16 | comment_string= 17 | comment_string_comment=Output one or more fields to the optional comment column of this format. 18 | 19 | [chr] 20 | index=1 21 | type=VARCHAR(20) 22 | adj=RemoveLeading('chr') 23 | comment=Chromosome 24 | 25 | [pos] 26 | index=2 27 | type=INTEGER NOT NULL 28 | comment=1-based position 29 | 30 | [ref] 31 | index=4 32 | type=VARCHAR(255) 33 | comment=Reference allele, '-' for insertion. 34 | 35 | [alt] 36 | index=5 37 | type=VARCHAR(255) 38 | comment=Alternative allele, '-' for deletion. 39 | 40 | [col_1] 41 | field=chr 42 | comment=chromosome 43 | 44 | [col_2] 45 | field=pos 46 | comment=position (1-based) 47 | 48 | [col_3] 49 | field=pos,ref 50 | adj=lambda x: str(int(x[0])-1+len(x[1])) 51 | comment=end position 52 | 53 | [col_4] 54 | field=ref 55 | comment=reference allele 56 | 57 | [col_5] 58 | field=alt 59 | comment=alternative allele 60 | 61 | [col_6] 62 | field=%(comment_string)s 63 | adj=JoinFields(',') 64 | comment=optional column 65 | -------------------------------------------------------------------------------- /resources/format/ANNOVAR_exonic_variant_function.fmt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Format/New for 5 | # a description of the format of this file. 6 | # 7 | 8 | [format description] 9 | description=Output from ANNOVAR for files of type *exonic_variant_function, generated from command "path/to/annovar/annotate_variation.pl annovar.txt path/to/annovar/humandb/". This format imports chr, pos, ref, alt and ANNOVAR annotations. For details please refer to http://www.openbioinformatics.org/annovar/annovar_gene.html 10 | variant=chr, pos, ref, alt 11 | variant_info=%(var_info)s 12 | 13 | [DEFAULT] 14 | var_info=mut_type 15 | var_info_comment=Fields to be outputted, can be one or both of mut_type and function. 16 | 17 | [chr] 18 | index=4 19 | type=VARCHAR(20) 20 | adj=RemoveLeading('chr') 21 | comment=Chromosome 22 | 23 | [pos] 24 | index=5 25 | type=INTEGER NOT NULL 26 | comment=1-based position, hg18 27 | 28 | [ref] 29 | index=7 30 | type=VARCHAR(255) 31 | comment=Reference allele, '-' for insertion. 32 | 33 | [alt] 34 | index=8 35 | type=VARCHAR(255) 36 | comment=Alternative allele, '-' for deletion. 37 | 38 | [mut_type] 39 | index=2 40 | type=VARCHAR(255) 41 | comment=the functional consequences of the variant. 42 | 43 | [genename] 44 | index=3 45 | type=VARCHAR(255) 46 | adj=ExtractField(1, sep=':') 47 | comment=Gene name (for the first exon if the variant is in more than one exons, but usually the names for all exons are the same). 48 | 49 | [function] 50 | index=3 51 | type=VARCHAR(255) 52 | comment=the gene name, the transcript identifier and the sequence change in the corresponding transcript 53 | -------------------------------------------------------------------------------- /resources/format/ANNOVAR_variant_function.fmt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Format/New for 5 | # a description of the format of this file. 6 | # 7 | 8 | [format description] 9 | description=Output from ANNOVAR for files of type "*.variant_function", generated from command "path/to/annovar/annotate_variation.pl annovar.txt path/to/annovar/humandb/". This format imports chr, pos, ref, alt and ANNOVAR annotations. For details please refer to http://www.openbioinformatics.org/annovar/annovar_gene.html 10 | variant=chr, pos, ref, alt 11 | variant_info=%(var_info)s 12 | 13 | [DEFAULT] 14 | var_info=region_type 15 | var_info_comment=Fields to be outputted, can be one or both of region_type and region_name. 16 | 17 | [chr] 18 | index=3 19 | type=VARCHAR(20) 20 | adj=RemoveLeading('chr') 21 | comment=Chromosome 22 | 23 | [pos] 24 | index=4 25 | type=INTEGER NOT NULL 26 | comment=1-based position, hg18 27 | 28 | [ref] 29 | index=6 30 | type=VARCHAR(255) 31 | comment=Reference allele, '-' for insertion. 32 | 33 | [alt] 34 | index=7 35 | type=VARCHAR(255) 36 | comment=Alternative allele, '-' for deletion. 37 | 38 | [region_type] 39 | index=1 40 | type=VARCHAR(255) 41 | comment=The genomic region type (i.e., intergenic, ncRNA_intronic, etc) where this variant lies. 42 | 43 | [region_name] 44 | index=2 45 | type=VARCHAR(255) 46 | comment=Genomic region name that corresponds to the region_type. If the variant lies in an intergenic region, this field will specify the closest known regions upstream and downstream of this variant. 47 | -------------------------------------------------------------------------------- /resources/format/csv.fmt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Format/New for 5 | # a description of the format of this file. 6 | 7 | [format description] 8 | description=Import variants (chr, pos, ref, alt) in csv format, or output arbitrary specified fields in csv format 9 | delimiter=',' 10 | variant=chr,pos,ref,alt 11 | export_by=chr,pos,ref,alt 12 | sort_output_by=%(order_by)s 13 | 14 | [DEFAULT] 15 | chr_col=1 16 | chr_col_comment=Column index for the chromosome field 17 | 18 | pos_col=2 19 | pos_col_comment=Column index for the position field 20 | 21 | ref_col=3 22 | ref_col_comment=Column index for the reference field 23 | 24 | alt_col=4 25 | alt_col_comment=Column index for the alternative field 26 | 27 | pos_adj=0 28 | pos_adj_comment=Set to 1 if the input position is zero-based. 29 | 30 | fields=chr,pos,ref,alt 31 | fields_comment=Fields to output, simple arithmetics are allowed (e.g. pos+1) but aggregation functions are not supported. 32 | 33 | order_by= 34 | order_by_comment=Fields used to order output in ascending order. 35 | 36 | [field formatter] 37 | fmt_*=CSVFormatter() 38 | fmt_GT=GenoFormatter(style='numeric') 39 | 40 | [col_1] 41 | field=%(fields)s 42 | adj=JoinRecords(',') 43 | comment=Output all fields as one column 44 | 45 | [col_2] 46 | field=GT 47 | comment=genotype 48 | 49 | [chr] 50 | index=%(chr_col)s 51 | type=VARCHAR(20) 52 | adj=RemoveLeading('chr') 53 | comment=Chromosome 54 | 55 | [pos] 56 | index=%(pos_col)s 57 | adj=IncreaseBy(%(pos_adj)s) 58 | type=INTEGER NOT NULL 59 | comment=1-based position 60 | 61 | [ref] 62 | index=%(ref_col)s 63 | type=VARCHAR(255) 64 | comment=Reference allele, '-' for insertion. 65 | 66 | [alt] 67 | index=%(alt_col)s 68 | type=VARCHAR(255) 69 | comment=Alternative allele, '-' for deletion. 70 | -------------------------------------------------------------------------------- /resources/format/pileup_indel.fmt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Format/New for 5 | # a description of the format of this file. 6 | # 7 | 8 | [format description] 9 | description=Input format for samtools pileup indel caller. This format imports chr, pos, ref, alt and genotype. 10 | variant=chr, pos, ref, alt 11 | genotype=GT 12 | 13 | [chr] 14 | index=1 15 | type=VARCHAR(20) 16 | adj=RemoveLeading('chr') 17 | comment=Chromosome name 18 | 19 | [pos] 20 | index=2 21 | type=INTEGER NOT NULL 22 | comment=Start position of the indel event. 23 | 24 | [type] 25 | index=3 26 | type=VARCHAR(255) 27 | comment=String summarizing the indel type, one of Dn (deletion of length n) and In (insertion of length n) 28 | 29 | [ref] 30 | index=3,4 31 | type=VARCHAR(255) 32 | # send in I4, AAAA or D1, A. If this is an insertion, ref is -, otherwise ref is the genotype 33 | adj=lambda x: '-' if x[0].startswith('I') else x[1] 34 | comment=reference allele, '-' for insertion 35 | 36 | [alt] 37 | index=3,4 38 | type=VARCHAR(255) 39 | # send in I4, AAAA or D1, A. If this is an insertion, alt is genotype, otherwise alt is '-' 40 | adj=lambda x: '-' if x[0].startswith('D') else x[1] 41 | comment=alternative allele, '-' for deletion 42 | 43 | [GT] 44 | index=6 45 | type=INT 46 | adj=MapValue({'homo': '2', 'hete': '1'}) 47 | comment=type of indel (homozygote or heterozygote) 48 | -------------------------------------------------------------------------------- /resources/format/plink.fmt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Format/New for 5 | # a description of the format of this file. 6 | 7 | [format description] 8 | description=Input format for PLINK dataset. Currently only PLINK binary PED file format is supported (*.bed, *.bim & *.fam) 9 | delimiter=',' 10 | variant=chr,pos,ref,alt 11 | genotype=GT 12 | # call a preprocessor to convert input PLINK files to a variant-based format 13 | preprocessor=PlinkConverter($build, chrom_namemap = {'23':'X', '24':'Y', '26':'M'}) 14 | 15 | [chr] 16 | index=1 17 | type=VARCHAR(20) 18 | comment=Chromosome 19 | 20 | [pos] 21 | index=2 22 | type=INTEGER NOT NULL 23 | comment=1-based Position of the snp 24 | 25 | [ref] 26 | index=3 27 | type=VARCHAR(255) 28 | comment=Reference allele 29 | 30 | [alt] 31 | index=4 32 | type=VARCHAR(255) 33 | comment=Alternative allele 34 | 35 | [GT] 36 | index=5: 37 | type=INTEGER 38 | adj=Nullify(['3', 'E']) 39 | comment=Gentoype coded as 0 (ref ref), 1 (ref alt) and 2 (alt alt) 40 | -------------------------------------------------------------------------------- /resources/format/rsname.fmt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Format/New for 5 | # a description of the format of this file. 6 | 7 | [format description] 8 | description=Import variants (chr, pos, ref, alt) that are queried from dbSNP database using provided rsnames 9 | delimiter=',' 10 | variant=chr,pos,ref,alt 11 | 12 | [DEFAULT] 13 | sep=',' 14 | sep_comment=delimiter used to separate input fields 15 | 16 | rsname_col=1 17 | rsname_col_comment=Index for the column with rsname 18 | 19 | dbfile=dbSNP-hg19_138.DB 20 | dbfile_comment=Name of an attached dbSNP database or path to the dbSNP database in sqlite format 21 | 22 | [chr] 23 | index=%(rsname_col)s 24 | type=VARCHAR(20) 25 | adj=FieldFromDB(dbfile="%(dbfile)s", res_field='chr', cond_fields='name') 26 | comment=Obtain chromosome from dbSNP by rsname 27 | 28 | [pos] 29 | index=%(rsname_col)s 30 | type=VARCHAR(20) 31 | adj=FieldFromDB(dbfile="%(dbfile)s", res_field='pos', cond_fields='name') 32 | comment=Obtain position from dbSNP by rsname 33 | 34 | [ref] 35 | index=%(rsname_col)s 36 | type=VARCHAR(20) 37 | adj=FieldFromDB(dbfile="%(dbfile)s", res_field='ref', cond_fields='name') 38 | comment=Obtain reference allele from dbSNP by rsname 39 | 40 | [alt] 41 | index=%(rsname_col)s 42 | type=VARCHAR(20) 43 | adj=FieldFromDB(dbfile="%(dbfile)s", res_field='alt', cond_fields='name') 44 | comment=Obtain alternative allele from dbSNP by rsname 45 | -------------------------------------------------------------------------------- /resources/format/twoalleles.fmt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Format/New for 5 | # a description of the format of this file. 6 | 7 | [format description] 8 | description=Import variants (chr, pos, ref, alt) from chr, pos, allele1, and allele2, using a reference genome to determine which one is reference 9 | delimiter=%(sep)s 10 | variant=chr,pos,ref,alt 11 | 12 | [DEFAULT] 13 | sep=',' 14 | sep_comment=delimiter used to separate input fields 15 | 16 | ref_genome=hg19 17 | ref_genome_commant=Reference genome of the input data 18 | 19 | chr_col=1 20 | chr_col_comment=Column index for the chromosome field 21 | 22 | pos_col=2 23 | pos_col_comment=Column index for the position field 24 | 25 | a1_col=3 26 | ref_col_comment=Column index for the reference field 27 | 28 | a2_col=4 29 | alt_col_comment=Column index for the alternative field 30 | 31 | [chr] 32 | index=%(chr_col)s 33 | type=VARCHAR(20) 34 | adj=RemoveLeading('chr') 35 | comment=Chromosome 36 | 37 | [pos] 38 | index=%(pos_col)s 39 | type=INTEGER NOT NULL 40 | adj=lambda x: x.split(':')[1] 41 | comment=1-based position 42 | 43 | [ref] 44 | index=%(chr_col)s, %(pos_col)s 45 | type=VARCHAR(20) 46 | adj=RefAtPos(build="%(ref_genome)s") 47 | comment=Obtain reference allele from reference genome 48 | 49 | [alt] 50 | index=%(chr_col)s, %(pos_col)s, %(a1_col)s, %(a2_col)s 51 | type=VARCHAR(20) 52 | adj=AltAtPos(build="%(ref_genome)s") 53 | comment=Obtain reference allele from reference genome 54 | -------------------------------------------------------------------------------- /src/cgatools/reference/ChromosomeIdField.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Complete Genomics, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); you 4 | // may not use this file except in compliance with the License. You 5 | // may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 12 | // implied. See the License for the specific language governing 13 | // permissions and limitations under the License. 14 | 15 | #include "cgatools/core.hpp" 16 | #include "cgatools/reference/ChromosomeIdField.hpp" 17 | #include "cgatools/reference/CrrFile.hpp" 18 | 19 | namespace cgatools { namespace reference { 20 | 21 | void ChromosomeIdField::parse(const char* first, const char* last) 22 | { 23 | buf_.assign(first, last); 24 | *id_ = ref_.getChromosomeId(buf_); 25 | } 26 | 27 | } } // cgatools::reference 28 | -------------------------------------------------------------------------------- /src/cgatools/reference/ChromosomeIdField.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Complete Genomics, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); you 4 | // may not use this file except in compliance with the License. You 5 | // may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 12 | // implied. See the License for the specific language governing 13 | // permissions and limitations under the License. 14 | 15 | #ifndef CGATOOLS_REFERENCE_CHROMOSOMEIDFIELD_HPP_ 16 | #define CGATOOLS_REFERENCE_CHROMOSOMEIDFIELD_HPP_ 1 17 | 18 | //! @file ChromosomeIdField.hpp 19 | 20 | #include "cgatools/core.hpp" 21 | #include "cgatools/util/DelimitedLineParser.hpp" 22 | 23 | namespace cgatools { namespace reference { 24 | 25 | class CrrFile; 26 | 27 | class ChromosomeIdField : public util::DelimitedFieldParser 28 | { 29 | public: 30 | ChromosomeIdField(const std::string& name, uint16_t* id, const CrrFile& ref) 31 | : DelimitedFieldParser(name), 32 | ref_(ref), 33 | id_(id) 34 | { 35 | } 36 | 37 | void parse(const char* first, const char* last); 38 | 39 | private: 40 | const CrrFile& ref_; 41 | uint16_t* id_; 42 | std::string buf_; 43 | }; 44 | 45 | } } // cgatools::reference 46 | 47 | #endif // CGATOOLS_REFERENCE_CHROMOSOMEIDFIELD_HPP_ 48 | -------------------------------------------------------------------------------- /src/cgatools/reference/RepeatMaskerStore.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Complete Genomics, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); you 4 | // may not use this file except in compliance with the License. You 5 | // may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 12 | // implied. See the License for the specific language governing 13 | // permissions and limitations under the License. 14 | 15 | #ifndef CGATOOLS_REFERENCE_REPEATMASKERSTORE_HPP_ 16 | #define CGATOOLS_REFERENCE_REPEATMASKERSTORE_HPP_ 1 17 | 18 | //! @file RepeatMaskerStore.hpp 19 | 20 | #include "cgatools/core.hpp" 21 | #include "cgatools/reference/RangeAnnotationStore.hpp" 22 | 23 | namespace cgatools { namespace reference { 24 | 25 | struct RepeatMaskerAnnotation 26 | { 27 | std::string name_, family_; 28 | double divergence_; 29 | bool strand_; 30 | }; 31 | 32 | class RepeatMaskerStore : 33 | public RangeAnnotationStore 34 | { 35 | public: 36 | 37 | RepeatMaskerStore(const reference::CrrFile& crr, const std::string& fn) 38 | : Base(crr) 39 | { 40 | load(fn); 41 | } 42 | 43 | void bindColumns(util::DelimitedFile& df, 44 | reference::Range& range, 45 | RepeatMaskerAnnotation& data) 46 | { 47 | using namespace util; 48 | bindRangeColumns(df, range); 49 | df.addField(StringField("repName", &data.name_)); 50 | df.addField(StringField("repFamily", &data.family_)); 51 | df.addField(ValueField("divergence", &data.divergence_)); 52 | df.addField(StrandField("strand", &data.strand_)); 53 | } 54 | }; 55 | 56 | }} 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /src/cgatools/util/Exception.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Complete Genomics, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); you 4 | // may not use this file except in compliance with the License. You 5 | // may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 12 | // implied. See the License for the specific language governing 13 | // permissions and limitations under the License. 14 | 15 | #include "cgatools/core.hpp" 16 | #include "cgatools/util/Exception.hpp" 17 | 18 | namespace cgatools { namespace util { 19 | 20 | Exception::Exception(const std::string& message) 21 | : message_(message) 22 | { 23 | } 24 | 25 | const char* Exception::what() const throw () 26 | { 27 | return message_.c_str(); 28 | } 29 | 30 | } } // cgatools::util 31 | -------------------------------------------------------------------------------- /src/cgatools/util/GenericHistogram.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Complete Genomics, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); you 4 | // may not use this file except in compliance with the License. You 5 | // may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 12 | // implied. See the License for the specific language governing 13 | // permissions and limitations under the License. 14 | 15 | #include "cgatools/core.hpp" 16 | #include "GenericHistogram.hpp" 17 | #include 18 | 19 | namespace cgatools { namespace util { 20 | 21 | std::ostream& operator<<(std::ostream& out, const SimpleHistogram& src) { 22 | src.write(out); 23 | return out; 24 | } 25 | void SimpleHistogram::write( std::ostream& out ) const 26 | { 27 | out << "#sum," << sum_ << ",overall," << number_ << std::endl; 28 | out << std::endl; 29 | out << ">bucket,count" << std::endl; 30 | for (size_t ii = 0; ii < count_.size()-1; ++ii) { 31 | out << ii << ',' << count_[ii] << std::endl; 32 | } 33 | out << "over," << count_.back() << std::endl; 34 | } 35 | } } 36 | -------------------------------------------------------------------------------- /src/cgatools/util/IndirectComparator.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Complete Genomics, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); you 4 | // may not use this file except in compliance with the License. You 5 | // may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 12 | // implied. See the License for the specific language governing 13 | // permissions and limitations under the License. 14 | 15 | #ifndef CGATOOLS_UTIL_INDIRECTCOMPARATOR_HPP_ 16 | #define CGATOOLS_UTIL_INDIRECTCOMPARATOR_HPP_ 1 17 | 18 | //! @file IndirectComparator.hpp 19 | 20 | #include "cgatools/core.hpp" 21 | 22 | namespace cgatools { namespace util { 23 | 24 | template 25 | class IndirectComparator 26 | { 27 | public: 28 | IndirectComparator(const Container& cc) 29 | : cc_(cc) 30 | { 31 | } 32 | 33 | template 34 | bool operator()(const Index& lhs, const Index& rhs) const 35 | { 36 | return cc_[lhs] < cc_[rhs]; 37 | } 38 | 39 | private: 40 | const Container& cc_; 41 | }; 42 | 43 | } } // cgatools::util 44 | 45 | #endif // CGATOOLS_UTIL_INDIRECTCOMPARATOR_HPP_ 46 | -------------------------------------------------------------------------------- /src/hdf5-blosc/blosc_filter.h: -------------------------------------------------------------------------------- 1 | #ifndef FILTER_BLOSC_H 2 | #define FILTER_BLOSC_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "blosc.h" 9 | 10 | /* Filter revision number, starting at 1 */ 11 | /* #define FILTER_BLOSC_VERSION 1 */ 12 | #define FILTER_BLOSC_VERSION 2 /* multiple compressors since Blosc 1.3 */ 13 | 14 | /* Filter ID registered with the HDF Group */ 15 | #define FILTER_BLOSC 32001 16 | 17 | /* Registers the filter with the HDF5 library. */ 18 | #if defined(_MSC_VER) 19 | __declspec(dllexport) 20 | #endif /* defined(_MSC_VER) */ 21 | int register_blosc(char **version, char **date); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /src/hdf5-blosc/blosc_plugin.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Dynamically loaded filter plugin for HDF5 blosc filter. 3 | * 4 | * Author: Kiyoshi Masui 5 | * Created: 2014 6 | * 7 | * For compiling, use: 8 | * $ h5cc -fPIC -shared blosc_plugin.c blosc_filter.c -o libH5Zblosc.so -lblosc 9 | * 10 | */ 11 | 12 | 13 | #include 14 | 15 | 16 | #define H5Z_class_t_vers 2 17 | 18 | #include "blosc_plugin.h" 19 | #include "blosc_filter.h" 20 | 21 | 22 | /* Prototypes for filter function in blosc_filter.c. */ 23 | size_t blosc_filter(unsigned flags, size_t cd_nelmts, 24 | const unsigned cd_values[], size_t nbytes, 25 | size_t* buf_size, void** buf); 26 | 27 | herr_t blosc_set_local(hid_t dcpl, hid_t type, hid_t space); 28 | 29 | 30 | H5Z_class_t blosc_H5Filter[1] = { 31 | { 32 | H5Z_CLASS_T_VERS, 33 | (H5Z_filter_t)(FILTER_BLOSC), 34 | 1, /* encoder_present flag (set to true) */ 35 | 1, /* decoder_present flag (set to true) */ 36 | "blosc", 37 | /* Filter info */ 38 | NULL, /* The "can apply" callback */ 39 | (H5Z_set_local_func_t)(blosc_set_local), /* The "set local" callback */ 40 | (H5Z_func_t)(blosc_filter), /* The filter function */ 41 | } 42 | }; 43 | 44 | 45 | H5PL_type_t H5PLget_plugin_type(void) { return H5PL_TYPE_FILTER; } 46 | 47 | 48 | const void* H5PLget_plugin_info(void) { return blosc_H5Filter; } 49 | -------------------------------------------------------------------------------- /src/hdf5-blosc/blosc_plugin.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Dynamically loaded filter plugin for HDF5 blosc filter. 3 | * 4 | * Author: Kiyoshi Masui 5 | * Created: 2014 6 | * 7 | * 8 | * Header file 9 | * ----------- 10 | * 11 | * This provides dynamically loaded HDF5 filter functionality (introduced 12 | * in HDF5-1.8.11, May 2013) to the blosc HDF5 filter. 13 | * 14 | * Usage: compile as a shared library and install either to the default 15 | * search location for HDF5 filter plugins (on Linux 16 | * /usr/local/hdf5/lib/plugin) or to a location pointed to by the 17 | * HDF5_PLUGIN_PATH environment variable. 18 | * 19 | */ 20 | 21 | 22 | #ifndef PLUGIN_BLOSC_H 23 | #define PLUGIN_BLOSC_H 24 | 25 | #include "H5PLextern.h" 26 | 27 | 28 | H5PL_type_t H5PLget_plugin_type(void); 29 | 30 | 31 | const void* H5PLget_plugin_info(void); 32 | 33 | 34 | #endif // PLUGIN_BLOSC_H 35 | 36 | 37 | -------------------------------------------------------------------------------- /src/libplinkio/LICENSE: -------------------------------------------------------------------------------- 1 | /* ===================================================================================== 2 | // 3 | // This is a small C and Python library for reading Plink genotype files, 4 | // written by Mattias Franberg, version 0.2.2 5 | // 6 | // https://bitbucket.org/mattias_franberg/libplinkio 7 | // 8 | // This software is not licensed or copyrighted. The varianttools developers 9 | // have been contacting its author and will include the license information when we 10 | // hear from the author, or replace it with alternative implementation if the author 11 | // requests for a removal. 12 | // 13 | ===================================================================================== */ 14 | -------------------------------------------------------------------------------- /src/libplinkio/bim_parse.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2012-2013, Mattias Frånberg 3 | * All rights reserved. 4 | * 5 | * This file is distributed under the Modified BSD License. See the COPYING file 6 | * for details. 7 | */ 8 | 9 | #ifndef __BIM_PARSE_H__ 10 | #define __BIM_PARSE_H__ 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | #include 17 | 18 | /** 19 | * Parses the loci and points the given locus array to a 20 | * the memory that contains them, and writes back the number 21 | * of loci. 22 | * 23 | * @param bim_fp Bim file. 24 | * @param locus The parsed loci will be stored here. 25 | * 26 | * @return PIO_OK if the loci could be parsed, PIO_ERROR otherwise. 27 | */ 28 | pio_status_t parse_loci(FILE *bim_fp, UT_array *locus); 29 | 30 | #ifdef __cplusplus 31 | } 32 | #endif 33 | 34 | #endif /* End of __BIM_PARSE_H__ */ 35 | -------------------------------------------------------------------------------- /src/libplinkio/common.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMMON_H_ 2 | #define _COMMON_H_ 3 | 4 | #include 5 | 6 | /** 7 | * Common integer conversion for python 3 and 2.x. 8 | */ 9 | #if PY_MAJOR_VERSION < 3 10 | #define PyLong_FromLong(x) ( (PyObject *) PyInt_FromLong( (long) ( x ) ) ) 11 | #define PyLong_AsLong(x) ( (long) PyInt_AsLong( ( x ) ) ) 12 | #endif 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /src/libplinkio/fam_parse.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2012-2013, Mattias Frånberg 3 | * All rights reserved. 4 | * 5 | * This file is distributed under the Modified BSD License. See the COPYING file 6 | * for details. 7 | */ 8 | 9 | #ifndef __FAM_PARSE_H__ 10 | #define __FAM_PARSE_H__ 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | #include 17 | 18 | #include 19 | #include 20 | 21 | /** 22 | * Parses the samples and points the given sample array to a 23 | * the memory that contains them, and writes back the number 24 | * of samples. 25 | * 26 | * @param fam_fp Fam file. 27 | * @param sample Parsed samples will be stored here. 28 | * 29 | * @return PIO_OK if the samples could be parsed, PIO_ERROR otherwise. 30 | */ 31 | pio_status_t parse_samples(FILE *fam_fp, UT_array *sample); 32 | 33 | #ifdef __cplusplus 34 | } 35 | #endif 36 | 37 | #endif /* End of __FAM_PARSE_H__ */ 38 | -------------------------------------------------------------------------------- /src/libplinkio/file.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2012-2013, Mattias Frånberg 3 | * All rights reserved. 4 | * 5 | * This file is distributed under the Modified BSD License. See the COPYING file 6 | * for details. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | /** 16 | * Total size for a buffer containing the copy command 17 | * that will be issued to the shell. 18 | */ 19 | #define FILE_COPY_BUFFER_SIZE 4096 20 | 21 | file_status_t 22 | file_copy(const char *from_path, const char *to_path) 23 | { 24 | char *copy_command = malloc( sizeof( char ) * ( strlen( from_path ) + strlen( to_path ) + 5 ) ); 25 | 26 | sprintf( copy_command, "cp %s %s", from_path, to_path ); 27 | int status = system( copy_command ); 28 | free( copy_command ); 29 | 30 | if( status != -1 ) 31 | { 32 | return FILE_OK; 33 | } 34 | else 35 | { 36 | return FILE_ERROR; 37 | } 38 | 39 | } 40 | 41 | file_status_t 42 | file_remove(const char *path) 43 | { 44 | char *rm_command = malloc( sizeof( char ) * ( strlen( path ) + 4 ) ); 45 | 46 | sprintf( rm_command, "rm %s", path ); 47 | int status = system( rm_command ); 48 | free( rm_command ); 49 | 50 | if( status != -1 ) 51 | { 52 | return FILE_OK; 53 | } 54 | else 55 | { 56 | return FILE_ERROR; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/libplinkio/file.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2012-2013, Mattias Frånberg 3 | * All rights reserved. 4 | * 5 | * This file is distributed under the Modified BSD License. See the COPYING file 6 | * for details. 7 | */ 8 | 9 | #ifndef __FILE_H__ 10 | #define __FILE_H__ 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | /** 17 | * Defines return values from file operations. 18 | */ 19 | enum file_status { 20 | /** 21 | * Means that an operation was successfully performed. 22 | */ 23 | FILE_OK, 24 | 25 | /** 26 | * Means that an error occurred. 27 | */ 28 | FILE_ERROR 29 | }; 30 | 31 | typedef enum file_status file_status_t; 32 | 33 | /** 34 | * Copies a file to another. 35 | * 36 | * @param from_path The path to copy from. 37 | * @param to_path The destination path. 38 | * 39 | * @return FILE_OK if the file was copied, FILE_ERROR otherwise. 40 | */ 41 | file_status_t file_copy(const char *from_path, const char *to_path); 42 | 43 | /** 44 | * Removes the file of the given path 45 | * 46 | * @param path Path to the file to remove. 47 | * 48 | * @return FILE_OK if the file was removed, FILE_ERROR otherwise. 49 | */ 50 | file_status_t file_remove(const char *path); 51 | 52 | #ifdef __cplusplus 53 | } 54 | #endif 55 | 56 | #endif /* __FILE_H__ */ 57 | -------------------------------------------------------------------------------- /src/libplinkio/snp_lookup.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2012-2013, Mattias Frånberg 3 | * All rights reserved. 4 | * 5 | * This file is distributed under the Modified BSD License. See the COPYING file 6 | * for details. 7 | */ 8 | 9 | #ifndef __SNP_LOOKUP_H__ 10 | #define __SNP_LOOKUP_H__ 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | #if HAVE_ENDIAN_H 17 | #include 18 | #elif HAVE_MACHINE_ENDIAN_H 19 | #include 20 | #elif HAVE_SYS_ENDIAN_H 21 | #include 22 | #endif 23 | 24 | /** 25 | * This files contains a lookup table that maps 26 | * SNPs packed in a single byte into an array of 27 | * four bytes. 28 | */ 29 | union snp_lookup_t 30 | { 31 | /** 32 | * Accessible as an array. 33 | */ 34 | unsigned char snp_array[4]; 35 | 36 | /** 37 | * Accessible as a block of bytes. 38 | */ 39 | int32_t snp_block; 40 | }; 41 | 42 | #if __BYTE_ORDER == __LITTLE_ENDIAN 43 | #include "snp_lookup_little.h" 44 | #else 45 | #include "snp_lookup_big.h" 46 | #endif /* End test endianess */ 47 | 48 | #ifdef __cplusplus 49 | } 50 | #endif 51 | 52 | #endif /* End of __SNP_LOOKUP_H__ */ 53 | -------------------------------------------------------------------------------- /src/libplinkio/status.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2012-2013, Mattias Frånberg 3 | * All rights reserved. 4 | * 5 | * This file is distributed under the Modified BSD License. See the COPYING file 6 | * for details. 7 | */ 8 | 9 | #ifndef __STATUS_H__ 10 | #define __STATUS_H__ 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | enum pio_status_e 17 | { 18 | /** 19 | * Function successful. 20 | */ 21 | PIO_OK, 22 | 23 | /** 24 | * File reached EOF. 25 | */ 26 | PIO_END, 27 | 28 | /** 29 | * Generic error. 30 | */ 31 | PIO_ERROR, 32 | 33 | /** 34 | * FAM IO error. 35 | */ 36 | P_FAM_IO_ERROR, 37 | 38 | /** 39 | * BIM IO error. 40 | */ 41 | P_BIM_IO_ERROR, 42 | 43 | /** 44 | * Bed IO error. 45 | */ 46 | P_BED_IO_ERROR 47 | }; 48 | 49 | typedef enum pio_status_e pio_status_t; 50 | 51 | #ifdef __cplusplus 52 | } 53 | #endif 54 | 55 | #endif /* End of __STATUS_H__ */ 56 | -------------------------------------------------------------------------------- /src/sqlite/py2/cache.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/cache.c -------------------------------------------------------------------------------- /src/sqlite/py2/cache.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/cache.h -------------------------------------------------------------------------------- /src/sqlite/py2/connection.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/connection.c -------------------------------------------------------------------------------- /src/sqlite/py2/connection.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/connection.h -------------------------------------------------------------------------------- /src/sqlite/py2/cursor.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/cursor.c -------------------------------------------------------------------------------- /src/sqlite/py2/cursor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/cursor.h -------------------------------------------------------------------------------- /src/sqlite/py2/module.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/module.c -------------------------------------------------------------------------------- /src/sqlite/py2/module.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/module.h -------------------------------------------------------------------------------- /src/sqlite/py2/prepare_protocol.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/prepare_protocol.c -------------------------------------------------------------------------------- /src/sqlite/py2/prepare_protocol.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/prepare_protocol.h -------------------------------------------------------------------------------- /src/sqlite/py2/row.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/row.c -------------------------------------------------------------------------------- /src/sqlite/py2/row.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/row.h -------------------------------------------------------------------------------- /src/sqlite/py2/sqlitecompat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/sqlitecompat.h -------------------------------------------------------------------------------- /src/sqlite/py2/statement.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/statement.c -------------------------------------------------------------------------------- /src/sqlite/py2/statement.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/statement.h -------------------------------------------------------------------------------- /src/sqlite/py2/util.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/util.c -------------------------------------------------------------------------------- /src/sqlite/py2/util.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/util.h -------------------------------------------------------------------------------- /src/sqlite/py3/prepare_protocol.h: -------------------------------------------------------------------------------- 1 | /* prepare_protocol.h - the protocol for preparing values for SQLite 2 | * 3 | * Copyright (C) 2005-2010 Gerhard Häring 4 | * 5 | * This file is part of pysqlite. 6 | * 7 | * This software is provided 'as-is', without any express or implied 8 | * warranty. In no event will the authors be held liable for any damages 9 | * arising from the use of this software. 10 | * 11 | * Permission is granted to anyone to use this software for any purpose, 12 | * including commercial applications, and to alter it and redistribute it 13 | * freely, subject to the following restrictions: 14 | * 15 | * 1. The origin of this software must not be misrepresented; you must not 16 | * claim that you wrote the original software. If you use this software 17 | * in a product, an acknowledgment in the product documentation would be 18 | * appreciated but is not required. 19 | * 2. Altered source versions must be plainly marked as such, and must not be 20 | * misrepresented as being the original software. 21 | * 3. This notice may not be removed or altered from any source distribution. 22 | */ 23 | 24 | #ifndef PYSQLITE_PREPARE_PROTOCOL_H 25 | #define PYSQLITE_PREPARE_PROTOCOL_H 26 | #include "Python.h" 27 | 28 | typedef struct 29 | { 30 | PyObject_HEAD 31 | } pysqlite_PrepareProtocol; 32 | 33 | extern PyTypeObject pysqlite_PrepareProtocolType; 34 | 35 | int pysqlite_prepare_protocol_init(pysqlite_PrepareProtocol* self, PyObject* args, PyObject* kwargs); 36 | void pysqlite_prepare_protocol_dealloc(pysqlite_PrepareProtocol* self); 37 | 38 | int pysqlite_prepare_protocol_setup_types(void); 39 | 40 | #define UNKNOWN (-1) 41 | #endif 42 | -------------------------------------------------------------------------------- /src/sqlite/py3/row.h: -------------------------------------------------------------------------------- 1 | /* row.h - an enhanced tuple for database rows 2 | * 3 | * Copyright (C) 2005-2010 Gerhard Häring 4 | * 5 | * This file is part of pysqlite. 6 | * 7 | * This software is provided 'as-is', without any express or implied 8 | * warranty. In no event will the authors be held liable for any damages 9 | * arising from the use of this software. 10 | * 11 | * Permission is granted to anyone to use this software for any purpose, 12 | * including commercial applications, and to alter it and redistribute it 13 | * freely, subject to the following restrictions: 14 | * 15 | * 1. The origin of this software must not be misrepresented; you must not 16 | * claim that you wrote the original software. If you use this software 17 | * in a product, an acknowledgment in the product documentation would be 18 | * appreciated but is not required. 19 | * 2. Altered source versions must be plainly marked as such, and must not be 20 | * misrepresented as being the original software. 21 | * 3. This notice may not be removed or altered from any source distribution. 22 | */ 23 | 24 | #ifndef PYSQLITE_ROW_H 25 | #define PYSQLITE_ROW_H 26 | #include "Python.h" 27 | 28 | typedef struct _Row 29 | { 30 | PyObject_HEAD 31 | PyObject* data; 32 | PyObject* description; 33 | } pysqlite_Row; 34 | 35 | extern PyTypeObject pysqlite_RowType; 36 | 37 | int pysqlite_row_setup_types(void); 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /src/sqlite/py3/util.h: -------------------------------------------------------------------------------- 1 | /* util.h - various utility functions 2 | * 3 | * Copyright (C) 2005-2010 Gerhard Häring 4 | * 5 | * This file is part of pysqlite. 6 | * 7 | * This software is provided 'as-is', without any express or implied 8 | * warranty. In no event will the authors be held liable for any damages 9 | * arising from the use of this software. 10 | * 11 | * Permission is granted to anyone to use this software for any purpose, 12 | * including commercial applications, and to alter it and redistribute it 13 | * freely, subject to the following restrictions: 14 | * 15 | * 1. The origin of this software must not be misrepresented; you must not 16 | * claim that you wrote the original software. If you use this software 17 | * in a product, an acknowledgment in the product documentation would be 18 | * appreciated but is not required. 19 | * 2. Altered source versions must be plainly marked as such, and must not be 20 | * misrepresented as being the original software. 21 | * 3. This notice may not be removed or altered from any source distribution. 22 | */ 23 | 24 | #ifndef PYSQLITE_UTIL_H 25 | #define PYSQLITE_UTIL_H 26 | #include "Python.h" 27 | #include "pythread.h" 28 | #include "sqlite3.h" 29 | #include "connection.h" 30 | 31 | int pysqlite_step(sqlite3_stmt* statement, pysqlite_Connection* connection); 32 | 33 | /** 34 | * Checks the SQLite error code and sets the appropriate DB-API exception. 35 | * Returns the error code (0 means no error occurred). 36 | */ 37 | int _pysqlite_seterror(sqlite3* db, sqlite3_stmt* st); 38 | #endif 39 | -------------------------------------------------------------------------------- /src/ucsc/inc/aliType.h: -------------------------------------------------------------------------------- 1 | /* aliType - some definitions for type of alignment. */ 2 | 3 | #ifndef ALITYPE_H 4 | #define ALITYPE_H 5 | 6 | enum gfType 7 | /* Types of sequence genoFind deals with. */ 8 | { 9 | gftDna = 0, /* DNA (genomic) */ 10 | gftRna = 1, /* RNA */ 11 | gftProt = 2, /* Protein. */ 12 | gftDnaX = 3, /* Genomic DNA translated to protein */ 13 | gftRnaX = 4, /* RNA translated to protein */ 14 | }; 15 | 16 | char *gfTypeName(enum gfType type); 17 | /* Return string representing type. */ 18 | 19 | enum gfType gfTypeFromName(char *name); 20 | /* Return type from string. */ 21 | 22 | enum ffStringency 23 | /* How tight of a match is required. */ 24 | { 25 | ffExact = 0, /* Only an exact match will do. */ 26 | 27 | ffCdna = 1, /* Near exact. Tolerate long gaps in target (genomic) */ 28 | ffTight = 2, /* Near exact. Not so tolerant of long gaps in target. */ 29 | ffLoose = 3, /* Less exact. */ 30 | }; 31 | 32 | #endif /* ALITYPE_H */ 33 | -------------------------------------------------------------------------------- /src/ucsc/inc/base64.h: -------------------------------------------------------------------------------- 1 | /* Base64 encoding and decoding. 2 | * by Galt Barber */ 3 | 4 | #ifndef BASE64_H 5 | #define BASE64_H 6 | 7 | #define B64CHARS "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" 8 | 9 | char *base64Encode(char *input, size_t inplen); 10 | /* Use base64 to encode a string. Returns one long encoded 11 | * string which need to be freeMem'd. Note: big-endian algorithm. 12 | * For some applications you may need to break the base64 output 13 | * of this function into lines no longer than 76 chars. 14 | */ 15 | 16 | boolean base64Validate(char *input); 17 | /* Return true if input is valid base64. 18 | * Note that the input string is changed by 19 | * eraseWhiteSpace(). */ 20 | 21 | char *base64Decode(char *input, size_t *returnSize); 22 | /* Use base64 to decode a string. Return decoded 23 | * string which will be freeMem'd. Note: big-endian algorithm. 24 | * Call eraseWhiteSpace() and check for invalid input 25 | * before passing in input if needed. 26 | * Optionally set retun size for use with binary data. 27 | */ 28 | 29 | #endif /* BASE64_H */ 30 | -------------------------------------------------------------------------------- /src/ucsc/inc/filePath.h: -------------------------------------------------------------------------------- 1 | /* filePath - stuff to handle file name parsing. */ 2 | #ifndef FILEPATH_H 3 | #define FILEPATH_H 4 | 5 | #include "common.h" 6 | 7 | void splitPath(char *path, char dir[PATH_LEN], char name[FILENAME_LEN], 8 | char extension[FILEEXT_LEN]); 9 | /* Split a full path into components. The dir component will include the 10 | * trailing / if any. The extension component will include the starting 11 | * . if any. Pass in NULL for dir, name, or extension if you don't care about 12 | * that part. */ 13 | 14 | char *expandRelativePath(char *baseDir, char *relPath); 15 | /* Expand relative path to more absolute one. */ 16 | 17 | char *pathRelativeToFile(char *baseFile, char *relPath); 18 | /* Given a base file name and a path relative to that, return 19 | * relative path interpreted as if it were seen from the 20 | * same directory holding the baseFile. 21 | * An example of using this would be in processing include 22 | * files. In this case the baseFile would be the current 23 | * source file, and the relPath would be from the include 24 | * statement. The returned result could then be used to 25 | * open the include file. */ 26 | 27 | void undosPath(char *path); 28 | /* Convert '\' to '/' in path. (DOS/Windows is typically ok with 29 | * this actually.) */ 30 | 31 | #endif /* FILEPATH_H */ 32 | -------------------------------------------------------------------------------- /src/ucsc/inc/gfxPoly.h: -------------------------------------------------------------------------------- 1 | /* gfxPoly - two dimensional polygon. */ 2 | 3 | #ifndef GFXPOLY_H 4 | #define GFXPOLY_H 5 | 6 | struct gfxPoint 7 | /* A two-dimensional point, typically in pixel coordinates. */ 8 | { 9 | struct gfxPoint *next; 10 | int x, y; /* Position */ 11 | }; 12 | 13 | struct gfxPoly 14 | /* A two-dimensional polygon */ 15 | { 16 | struct gfxPoly *next; 17 | int ptCount; /* Number of points. */ 18 | struct gfxPoint *ptList; /* First point in list, which is circular. */ 19 | struct gfxPoint *lastPoint; /* Last point in list. */ 20 | }; 21 | 22 | struct gfxPoly *gfxPolyNew(); 23 | /* Create new (empty) polygon */ 24 | 25 | void gfxPolyFree(struct gfxPoly **pPoly); 26 | /* Free up resources associated with polygon */ 27 | 28 | void gfxPolyAddPoint(struct gfxPoly *poly, int x, int y); 29 | /* Add point to polygon. */ 30 | 31 | #endif /* GFXPOLY_H */ 32 | -------------------------------------------------------------------------------- /src/ucsc/inc/hmmstats.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * Copyright (C) 2000 Jim Kent. This source code may be freely used * 3 | * for personal, academic, and non-profit purposes. Commercial use * 4 | * permitted only by explicit agreement with Jim Kent (jim_kent@pacbell.net) * 5 | *****************************************************************************/ 6 | /* hmmstats.h - Stuff for doing statistical analysis in general and 7 | * hidden Markov models in particular. */ 8 | #ifndef HMMSTATS_H 9 | #define HMMSTATS_H 10 | 11 | int scaledLog(double val); 12 | /* Return scaled log of val. */ 13 | 14 | #define logScaleFactor 1000 15 | /* Amount we scale logs by. */ 16 | 17 | double simpleGaussean(double x); 18 | /* Gaussean distribution with standard deviation 1 and mean 0. */ 19 | 20 | double gaussean(double x, double mean, double sd); 21 | /* Gaussean distribution with mean and standard deviation at point x */ 22 | 23 | double calcVarianceFromSums(double sum, double sumSquares, bits64 n); 24 | /* Calculate variance. */ 25 | 26 | double calcStdFromSums(double sum, double sumSquares, bits64 n); 27 | /* Calculate standard deviation. */ 28 | 29 | #endif /* HMMSTATS_H */ 30 | -------------------------------------------------------------------------------- /src/ucsc/inc/https.h: -------------------------------------------------------------------------------- 1 | /* Connect via https. */ 2 | 3 | #ifndef NET_HTTPS_H 4 | #define NET_HTTPS_H 5 | 6 | int netConnectHttps(char *hostName, int port); 7 | /* Return socket for https connection with server or -1 if error. */ 8 | 9 | #endif//ndef NET_HTTPS_H 10 | -------------------------------------------------------------------------------- /src/ucsc/inc/internet.h: -------------------------------------------------------------------------------- 1 | /* internet - some stuff for routines that use the internet 2 | * and aren't afraid to include some internet specific structures 3 | * and the like. See also net for stuff that is higher level. */ 4 | 5 | #ifndef INTERNET_H 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | bits32 internetHostIp(char *hostName); 12 | /* Get IP v4 address (in host byte order) for hostName. 13 | * Warn and return 0 if there's a problem. */ 14 | 15 | boolean internetFillInAddress(char *hostName, int port, 16 | struct sockaddr_in *address); 17 | /* Fill in address. Warn and return FALSE if can't. */ 18 | 19 | boolean internetIpToDottedQuad(bits32 ip, char dottedQuad[17]); 20 | /* Convert IP4 address in host byte order to dotted quad 21 | * notation. Warn and return FALSE if there's a 22 | * problem. */ 23 | 24 | boolean internetDottedQuadToIp(char *dottedQuad, bits32 *retIp); 25 | /* Convert dotted quad format address to IP4 address in 26 | * host byte order. Warn and return FALSE if there's a 27 | * problem. */ 28 | 29 | boolean internetIsDottedQuad(char *s); 30 | /* Returns TRUE if it looks like s is a dotted quad. */ 31 | 32 | void internetParseDottedQuad(char *dottedQuad, unsigned char quad[4]); 33 | /* Parse dotted quads into quad */ 34 | 35 | void internetUnpackIp(bits32 packed, unsigned char unpacked[4]); 36 | /* Convert from 32 bit to 4-byte format with most significant 37 | * byte first. */ 38 | 39 | boolean internetIpInSubnet(unsigned char unpackedIp[4], 40 | unsigned char subnet[4]); 41 | /* Return true if unpacked IP address is in subnet. */ 42 | 43 | #endif /* INTERNET_H */ 44 | -------------------------------------------------------------------------------- /src/ucsc/inc/memalloc.h: -------------------------------------------------------------------------------- 1 | /* Let the user redirect where memory allocation/deallocation 2 | * happens. 'careful' routines help debug scrambled heaps. 3 | * 4 | * This file is copyright 2002 Jim Kent, but license is hereby 5 | * granted for all use - public, private or commercial. */ 6 | 7 | #ifndef MEMALLOC_H 8 | #define MEMALLOC_H 9 | 10 | struct memHandler 11 | { 12 | struct memHandler *next; 13 | void * (*alloc)(size_t size); 14 | void (*free)(void *vpt); 15 | void * (*realloc)(void* vpt, size_t size); 16 | }; 17 | 18 | struct memHandler *pushMemHandler(struct memHandler *newHandler); 19 | /* Use newHandler for memory requests until matching popMemHandler. 20 | * Returns previous top of memory handler stack. */ 21 | 22 | struct memHandler *popMemHandler(); 23 | /* Removes top element from memHandler stack and returns it. */ 24 | 25 | void setDefaultMemHandler(); 26 | /* Sets memHandler to the default. */ 27 | 28 | void pushCarefulMemHandler(size_t maxAlloc); 29 | /* Push the careful (paranoid, conservative, checks everything) 30 | * memory handler top of the memHandler stack and use it. */ 31 | 32 | void carefulCheckHeap(); 33 | /* Walk through allocated memory and make sure that all cookies are 34 | * in place. Only walks through what's been done since 35 | * pushCarefulMemHandler(). */ 36 | 37 | int carefulCountBlocksAllocated(); 38 | /* How many memory items are allocated? (Since called 39 | * pushCarefulMemHandler(). */ 40 | 41 | size_t carefulTotalAllocated(); 42 | /* Return total bases allocated */ 43 | 44 | void setMaxAlloc(size_t s); 45 | /* Set large allocation limit. */ 46 | 47 | void memTrackerStart(); 48 | /* Push memory handler that will track blocks allocated so that 49 | * they can be automatically released with memTrackerEnd(). */ 50 | 51 | void memTrackerEnd(); 52 | /* Free any remaining blocks and pop tracker memory handler. */ 53 | 54 | #endif /* MEMALLOC_H */ 55 | -------------------------------------------------------------------------------- /src/ucsc/inc/regexHelper.h: -------------------------------------------------------------------------------- 1 | /* regexHelper: easy wrappers on POSIX Extended Regular Expressions (man 7 regex, man 3 regex) */ 2 | 3 | #ifndef REGEXHELPER_H 4 | #define REGEXHELPER_H 5 | 6 | #include "common.h" 7 | #include 8 | 9 | const regex_t *regexCompile(const char *exp, const char *description, int compileFlags); 10 | /* Compile exp (or die with an informative-as-possible error message). 11 | * Cache pre-compiled regex's internally (so don't free result after use). */ 12 | 13 | boolean regexMatch(const char *string, const char *exp); 14 | /* Return TRUE if string matches regular expression exp (case sensitive). */ 15 | 16 | boolean regexMatchNoCase(const char *string, const char *exp); 17 | /* Return TRUE if string matches regular expression exp (case insensitive). */ 18 | 19 | boolean regexMatchSubstr(const char *string, const char *exp, 20 | regmatch_t substrArr[], size_t substrArrSize); 21 | /* Return TRUE if string matches regular expression exp (case sensitive); 22 | * regexec fills in substrArr with substring offsets. */ 23 | 24 | boolean regexMatchSubstrNoCase(const char *string, const char *exp, 25 | regmatch_t substrArr[], size_t substrArrSize); 26 | /* Return TRUE if string matches regular expression exp (case insensitive); 27 | * regexec fills in substrArr with substring offsets. */ 28 | 29 | #endif // REGEXHELPER_H 30 | -------------------------------------------------------------------------------- /src/ucsc/inc/verbose.h: -------------------------------------------------------------------------------- 1 | /* verbose.h - write out status messages according to the 2 | * current verbosity level. These messages go to stderr. */ 3 | 4 | #ifndef VERBOSE_H 5 | #define VERBOSE_H 6 | 7 | void verbose(int verbosity, char *format, ...) 8 | /* Write printf formatted message to log (which by 9 | * default is stderr) if global verbose variable 10 | * is set to verbosity or higher. */ 11 | #if defined(__GNUC__) 12 | __attribute__((format(printf, 2, 3))) 13 | #endif 14 | ; 15 | 16 | void verboseVa(int verbosity, char *format, va_list args); 17 | /* Log with at given verbosity vprintf formatted args. */ 18 | 19 | void verboseTimeInit(void); 20 | /* Initialize or reinitialize the previous time for use by verboseTime. */ 21 | 22 | void verboseTime(int verbosity, char *label, ...) 23 | /* Print label and how long it's been since last call. Start time can be 24 | * initialized with verboseTimeInit, otherwise the elapsed time will be 25 | * zero. */ 26 | #if defined(__GNUC__) 27 | __attribute__((format(printf, 2, 3))) 28 | #endif 29 | ; 30 | 31 | void verboseDot(); 32 | /* Write I'm alive dot (at verbosity level 1) */ 33 | 34 | boolean verboseDotsEnabled(); 35 | /* check if outputting of happy dots are enabled. They will be enabled if the 36 | * verbosity is > 0, stderr is a tty and we don't appear to be running an 37 | * emacs shell. */ 38 | 39 | int verboseLevel(void); 40 | /* Get verbosity level. */ 41 | 42 | void verboseSetLevel(int verbosity); 43 | /* Set verbosity level in log. 0 for no logging, 44 | * higher number for increasing verbosity. */ 45 | 46 | void verboseSetLogFile(char *name); 47 | /* Set logFile for verbose messages overrides stderr. */ 48 | 49 | FILE *verboseLogFile(); 50 | /* Get the verbose log file. */ 51 | 52 | #endif /* VERBOSE_H */ 53 | -------------------------------------------------------------------------------- /src/ucsc/inc/zlibFace.h: -------------------------------------------------------------------------------- 1 | /* Wrappers around zlib to make interfacing to it a bit easier. */ 2 | 3 | #ifndef ZLIBFACE_H 4 | #define ZLIBFACE_H 5 | 6 | size_t zCompress( 7 | void *uncompressed, /* Start of area to compress. */ 8 | size_t uncompressedSize, /* Size of area to compress. */ 9 | void *compBuf, /* Where to put compressed bits */ 10 | size_t compBufSize); /* Size of compressed bits - calculate using zCompBufSize */ 11 | /* Compress data from memory to memory. Returns size after compression. */ 12 | 13 | size_t zCompBufSize(size_t uncompressedSize); 14 | /* Return size of buffer needed to compress something of given size uncompressed. */ 15 | 16 | size_t zUncompress( 17 | void *compressed, /* Compressed area */ 18 | size_t compressedSize, /* Size after compression */ 19 | void *uncompBuf, /* Where to put uncompressed bits */ 20 | size_t uncompBufSize); /* Max size of uncompressed bits. */ 21 | /* Uncompress data from memory to memory. Returns size after decompression. */ 22 | 23 | void zSelfTest(int count); 24 | /* Run an internal diagnostic. */ 25 | 26 | #endif /* ZLIBFACE_H */ 27 | -------------------------------------------------------------------------------- /src/ucsc/lib/aliType.c: -------------------------------------------------------------------------------- 1 | /* aliType - some definitions for type of alignment. */ 2 | #include "common.h" 3 | #include "aliType.h" 4 | 5 | 6 | char *gfTypeName(enum gfType type) 7 | /* Return string representing type. */ 8 | { 9 | if (type == gftDna) return "DNA"; 10 | if (type == gftRna) return "RNA"; 11 | if (type == gftProt) return "protein"; 12 | if (type == gftDnaX) return "DNAX"; 13 | if (type == gftRnaX) return "RNAX"; 14 | internalErr(); 15 | return NULL; 16 | } 17 | 18 | enum gfType gfTypeFromName(char *name) 19 | /* Return type from string. */ 20 | { 21 | if (sameWord(name, "DNA")) return gftDna; 22 | if (sameWord(name, "RNA")) return gftRna; 23 | if (sameWord(name, "protein")) return gftProt; 24 | if (sameWord(name, "prot")) return gftProt; 25 | if (sameWord(name, "DNAX")) return gftDnaX; 26 | if (sameWord(name, "RNAX")) return gftRnaX; 27 | errAbort("Unknown sequence type '%s'", name); 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /src/ucsc/lib/hmmstats.c: -------------------------------------------------------------------------------- 1 | /* hmmstats.c - Stuff for doing statistical analysis in general and 2 | * hidden Markov models in particular. 3 | * 4 | * This file is copyright 2002 Jim Kent, but license is hereby 5 | * granted for all use - public, private or commercial. */ 6 | 7 | #include "common.h" 8 | #include "hmmstats.h" 9 | 10 | 11 | int scaledLog(double val) 12 | /* Return scaled log of val. */ 13 | { 14 | return round(logScaleFactor * log(val)); 15 | } 16 | 17 | double oneOverSqrtTwoPi = 0.39894228; 18 | 19 | double simpleGaussean(double x) 20 | /* Gaussean distribution with standard deviation 1 and mean 0. */ 21 | { 22 | return oneOverSqrtTwoPi * exp(-0.5*x*x ); 23 | } 24 | 25 | double gaussean(double x, double mean, double sd) 26 | /* Gaussean distribution with mean and standard deviation at point x */ 27 | { 28 | x -= mean; 29 | x /= sd; 30 | return oneOverSqrtTwoPi * exp(-0.5*x*x) / sd; 31 | } 32 | 33 | double calcVarianceFromSums(double sum, double sumSquares, bits64 n) 34 | /* Calculate variance. */ 35 | { 36 | double var = sumSquares - sum*sum/n; 37 | if (n > 1) 38 | var /= n-1; 39 | return var; 40 | } 41 | 42 | double calcStdFromSums(double sum, double sumSquares, bits64 n) 43 | /* Calculate standard deviation. */ 44 | { 45 | return sqrt(calcVarianceFromSums(sum, sumSquares, n)); 46 | } 47 | -------------------------------------------------------------------------------- /src/ucsc/lib/portimpl.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * Copyright (C) 2000 Jim Kent. This source code may be freely used * 3 | * for personal, academic, and non-profit purposes. Commercial use * 4 | * permitted only by explicit agreement with Jim Kent (jim_kent@pacbell.net) * 5 | *****************************************************************************/ 6 | /* Implement portable stuff.... */ 7 | 8 | /* There is one of the following structures for each web server 9 | * we support. During run time looking at the environment variable 10 | * SERVER_SOFTWARE we decide which of these to use. */ 11 | struct webServerSpecific 12 | { 13 | char *name; 14 | 15 | /* Make a good name for a temp file. */ 16 | void (*makeTempName)(struct tempName *tn, char *base, char *suffix); 17 | 18 | /* Return directory to look for cgi in. */ 19 | char * (*cgiDir)(); 20 | 21 | #ifdef NEVER 22 | /* Return cgi suffix. */ 23 | char * (*cgiSuffix)(); 24 | #endif /* NEVER */ 25 | 26 | /* Return relative speed of CPU. (UCSC CSE 1999 FTP machine is 1.0) */ 27 | double (*speed)(); 28 | 29 | /* The relative path to trash directory for CGI binaries */ 30 | char * (*trashDir)(); 31 | 32 | }; 33 | 34 | 35 | extern struct webServerSpecific wssMicrosoftII, wssMicrosoftPWS, wssDefault, 36 | wssLinux, wssCommandLine, wssBrcMcw; 37 | 38 | char *rTempName(char *dir, char *base, char *suffix); 39 | /* Make a temp name that's almost certainly unique. */ 40 | -------------------------------------------------------------------------------- /src/ucsc/lib/servBrcMcw.c: -------------------------------------------------------------------------------- 1 | /* Stuff that's specific for .brc.mcw.edu server goes here. 2 | * 3 | * This file is copyright 2004 Jim Kent, but license is hereby 4 | * granted for all use - public, private or commercial. */ 5 | 6 | #include "common.h" 7 | #include "portable.h" 8 | #include "portimpl.h" 9 | #include "obscure.h" 10 | #include "hash.h" 11 | 12 | 13 | static char *__trashDir = "/trash"; 14 | 15 | static void _makeTempName(struct tempName *tn, char *base, char *suffix) 16 | /* Figure out a temp name, and how CGI and HTML will access it. */ 17 | { 18 | char *tname; 19 | 20 | tname = rTempName(__trashDir, base, suffix); 21 | strcpy(tn->forCgi, tname); 22 | strcpy(tn->forHtml, tname); 23 | } 24 | 25 | static char *_cgiDir() 26 | { 27 | return "/cgi-bin/"; 28 | } 29 | 30 | static char *_trashDir() 31 | { 32 | return __trashDir; 33 | } 34 | 35 | static double _speed() 36 | { 37 | return 3.0; 38 | } 39 | 40 | struct webServerSpecific wssBrcMcw = 41 | { 42 | "default", 43 | _makeTempName, 44 | _cgiDir, 45 | _speed, 46 | _trashDir, 47 | }; 48 | -------------------------------------------------------------------------------- /src/ucsc/lib/servCrunx.c: -------------------------------------------------------------------------------- 1 | /* Stuff that's specific for local linux server goes here. 2 | * 3 | * This file is copyright 2002 Jim Kent, but license is hereby 4 | * granted for all use - public, private or commercial. */ 5 | 6 | #include "common.h" 7 | #include "portable.h" 8 | #include "portimpl.h" 9 | #include "obscure.h" 10 | 11 | 12 | static char *__trashDir = "/home/httpd/html/trash"; 13 | 14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix) 15 | /* Figure out a temp name, and how CGI and HTML will access it. */ 16 | { 17 | char *tname; 18 | char *tempDirCgi = __trashDir; 19 | char *tempDirHtml = "/trash"; 20 | int tlcLen = strlen(tempDirCgi); 21 | int tlhLen = strlen(tempDirHtml); 22 | 23 | tname = rTempName(tempDirCgi, base, suffix); 24 | strcpy(tn->forCgi, tname); 25 | memcpy(tn->forHtml, tempDirHtml, tlhLen); 26 | strcpy(tn->forHtml+tlhLen, tn->forCgi+tlcLen); 27 | } 28 | 29 | static char *_cgiDir() 30 | { 31 | return "../cgi-bin/"; 32 | } 33 | 34 | static char *_trashDir() 35 | { 36 | return __trashDir; 37 | } 38 | 39 | static double _speed() 40 | { 41 | return 3.0; 42 | } 43 | 44 | struct webServerSpecific wssLinux = 45 | { 46 | "linux", 47 | _makeTempName, 48 | _cgiDir, 49 | _speed, 50 | _trashDir, 51 | }; 52 | -------------------------------------------------------------------------------- /src/ucsc/lib/servcis.c: -------------------------------------------------------------------------------- 1 | /* Stuff that's specific for Comp Science dept. web server goes here. 2 | * 3 | * This file is copyright 2002 Jim Kent, but license is hereby 4 | * granted for all use - public, private or commercial. */ 5 | 6 | #include "common.h" 7 | #include "portable.h" 8 | #include "portimpl.h" 9 | #include "obscure.h" 10 | #include "hash.h" 11 | 12 | 13 | static char *__trashDir = "../trash"; 14 | 15 | static void _makeTempName(struct tempName *tn, char *base, char *suffix) 16 | /* Figure out a temp name, and how CGI and HTML will access it. */ 17 | { 18 | char *tname; 19 | 20 | tname = rTempName(__trashDir, base, suffix); 21 | strcpy(tn->forCgi, tname); 22 | strcpy(tn->forHtml, tname); 23 | } 24 | 25 | static char *_cgiDir() 26 | { 27 | return "../cgi-bin/"; 28 | } 29 | 30 | static char *_trashDir() 31 | { 32 | return __trashDir; 33 | } 34 | 35 | static double _speed() 36 | { 37 | return 3.0; 38 | } 39 | 40 | 41 | struct webServerSpecific wssDefault = 42 | { 43 | "default", 44 | _makeTempName, 45 | _cgiDir, 46 | _speed, 47 | _trashDir, 48 | }; 49 | -------------------------------------------------------------------------------- /src/ucsc/lib/servcl.c: -------------------------------------------------------------------------------- 1 | /* "Web Server" for command line execution. 2 | * 3 | * This file is copyright 2002 Jim Kent, but license is hereby 4 | * granted for all use - public, private or commercial. */ 5 | 6 | #include "common.h" 7 | #include "portable.h" 8 | #include "portimpl.h" 9 | #include "obscure.h" 10 | 11 | 12 | static char *__trashDir = "."; 13 | 14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix) 15 | /* Figure out a temp name, and how CGI and HTML will access it. */ 16 | { 17 | char *tname = rTempName(__trashDir, base, suffix); 18 | strcpy(tn->forCgi, tname); 19 | strcpy(tn->forHtml, tn->forCgi); 20 | } 21 | 22 | static char *_cgiDir() 23 | { 24 | char *jkwebDir; 25 | if ((jkwebDir = getenv("JKWEB")) == NULL) 26 | return ""; 27 | else 28 | return jkwebDir; 29 | } 30 | 31 | static char *_trashDir() 32 | { 33 | return __trashDir; 34 | } 35 | 36 | static double _speed() 37 | { 38 | return 1.0; 39 | } 40 | 41 | 42 | struct webServerSpecific wssCommandLine = 43 | { 44 | "commandLine", 45 | _makeTempName, 46 | _cgiDir, 47 | _speed, 48 | _trashDir, 49 | }; 50 | -------------------------------------------------------------------------------- /src/ucsc/lib/servmsII.c: -------------------------------------------------------------------------------- 1 | /* Stuff that's specific for the MS II Web Server goes here. 2 | * 3 | * This file is copyright 2002 Jim Kent, but license is hereby 4 | * granted for all use - public, private or commercial. */ 5 | 6 | #include "common.h" 7 | #include "portable.h" 8 | #include "portimpl.h" 9 | #include "obscure.h" 10 | 11 | 12 | static char *__trashDir = "..\\trash"; 13 | 14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix) 15 | /* Figure out a temp name, and how CGI and HTML will access it. */ 16 | { 17 | long tempIx = incCounterFile("tcounter"); 18 | sprintf(tn->forCgi, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix); 19 | sprintf(tn->forHtml, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix); 20 | } 21 | 22 | static char *_cgiDir() 23 | { 24 | return ""; 25 | } 26 | 27 | static char *_trashDir() 28 | { 29 | return __trashDir; 30 | } 31 | 32 | static double _speed() 33 | { 34 | return 2.5; 35 | } 36 | 37 | 38 | struct webServerSpecific wssMicrosoftII = 39 | { 40 | "Microsoft-IIS", 41 | _makeTempName, 42 | _cgiDir, 43 | _speed, 44 | _trashDir, 45 | }; 46 | -------------------------------------------------------------------------------- /src/ucsc/lib/servpws.c: -------------------------------------------------------------------------------- 1 | /* Stuff that's specific for the Personal Web Server goes here. 2 | * 3 | * This file is copyright 2002 Jim Kent, but license is hereby 4 | * granted for all use - public, private or commercial. */ 5 | 6 | #include "common.h" 7 | #include "portable.h" 8 | #include "portimpl.h" 9 | #include "obscure.h" 10 | 11 | 12 | static char *__trashDir = "..\\trash"; 13 | 14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix) 15 | /* Figure out a temp name, and how CGI and HTML will access it. */ 16 | { 17 | long tempIx = incCounterFile("tcounter"); 18 | sprintf(tn->forCgi, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix); 19 | sprintf(tn->forHtml, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix); 20 | } 21 | 22 | static char *_cgiDir() 23 | { 24 | return "../cgi-bin/"; 25 | } 26 | 27 | static char *_trashDir() 28 | { 29 | return __trashDir; 30 | } 31 | 32 | static double _speed() 33 | { 34 | return 1.25; 35 | } 36 | 37 | struct webServerSpecific wssMicrosoftPWS = 38 | { 39 | "Microsoft-PWS", 40 | _makeTempName, 41 | _cgiDir, 42 | _speed, 43 | _trashDir, 44 | }; 45 | -------------------------------------------------------------------------------- /src/ucsc/samtools/sam_header.h: -------------------------------------------------------------------------------- 1 | #ifndef __SAM_HEADER_H__ 2 | #define __SAM_HEADER_H__ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void *sam_header_parse2(const char *headerText); 9 | void *sam_header_merge(int n, const void **dicts); 10 | void sam_header_free(void *header); 11 | char *sam_header_write(const void *headerDict); // returns a newly allocated string 12 | 13 | /* 14 | // Usage example 15 | const char *key, *val; 16 | void *iter = sam_header_parse2(bam->header->text); 17 | while ( iter = sam_header_key_val(iter, "RG","ID","SM" &key,&val) ) printf("%s\t%s\n", key,val); 18 | */ 19 | void *sam_header2key_val(void *iter, const char type[2], const char key_tag[2], const char value_tag[2], const char **key, const char **value); 20 | char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n); 21 | 22 | /* 23 | // Usage example 24 | int i, j, n; 25 | const char *tags[] = {"SN","LN","UR","M5",NULL}; 26 | void *dict = sam_header_parse2(bam->header->text); 27 | char **tbl = sam_header2tbl_n(h->dict, "SQ", tags, &n); 28 | for (i=0; i 5 | 6 | static inline int bam_is_big_endian() 7 | { 8 | long one= 1; 9 | return !(*((char *)(&one))); 10 | } 11 | static inline uint16_t bam_swap_endian_2(uint16_t v) 12 | { 13 | return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); 14 | } 15 | static inline void *bam_swap_endian_2p(void *x) 16 | { 17 | *(uint16_t*)x = bam_swap_endian_2(*(uint16_t*)x); 18 | return x; 19 | } 20 | static inline uint32_t bam_swap_endian_4(uint32_t v) 21 | { 22 | v = ((v & 0x0000FFFFU) << 16) | (v >> 16); 23 | return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); 24 | } 25 | static inline void *bam_swap_endian_4p(void *x) 26 | { 27 | *(uint32_t*)x = bam_swap_endian_4(*(uint32_t*)x); 28 | return x; 29 | } 30 | static inline uint64_t bam_swap_endian_8(uint64_t v) 31 | { 32 | v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); 33 | v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); 34 | return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); 35 | } 36 | static inline void *bam_swap_endian_8p(void *x) 37 | { 38 | *(uint64_t*)x = bam_swap_endian_8(*(uint64_t*)x); 39 | return x; 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /src/ucsc/tabix/knetfile.h: -------------------------------------------------------------------------------- 1 | #ifndef KNETFILE_H 2 | #define KNETFILE_H 3 | 4 | #include 5 | #include 6 | 7 | #ifndef _WIN32 8 | #define netread(fd, ptr, len) read(fd, ptr, len) 9 | #define netwrite(fd, ptr, len) write(fd, ptr, len) 10 | #define netclose(fd) close(fd) 11 | #else 12 | #include 13 | #define netread(fd, ptr, len) recv(fd, ptr, len, 0) 14 | #define netwrite(fd, ptr, len) send(fd, ptr, len, 0) 15 | #define netclose(fd) closesocket(fd) 16 | #endif 17 | 18 | // FIXME: currently I/O is unbuffered 19 | 20 | #define KNF_TYPE_LOCAL 1 21 | #define KNF_TYPE_FTP 2 22 | #define KNF_TYPE_HTTP 3 23 | 24 | typedef struct knetFile_s { 25 | int type, fd; 26 | int64_t offset; 27 | char *host, *port; 28 | 29 | // the following are for FTP only 30 | int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready; 31 | char *response, *retr, *size_cmd; 32 | int64_t seek_offset; // for lazy seek 33 | int64_t file_size; 34 | 35 | // the following are for HTTP only 36 | char *path, *http_host; 37 | } knetFile; 38 | 39 | #define knet_tell(fp) ((fp)->offset) 40 | #define knet_fileno(fp) ((fp)->fd) 41 | 42 | #ifdef __cplusplus 43 | extern "C" { 44 | #endif 45 | 46 | #ifdef _WIN32 47 | int knet_win32_init(); 48 | void knet_win32_destroy(); 49 | #endif 50 | 51 | knetFile *knet_open(const char *fn, const char *mode); 52 | 53 | /* 54 | This only works with local files. 55 | */ 56 | knetFile *knet_dopen(int fd, const char *mode); 57 | 58 | /* 59 | If ->is_ready==0, this routine updates ->fd; otherwise, it simply 60 | reads from ->fd. 61 | */ 62 | off_t knet_read(knetFile *fp, void *buf, off_t len); 63 | 64 | /* 65 | This routine only sets ->offset and ->is_ready=0. It does not 66 | communicate with the FTP server. 67 | */ 68 | off_t knet_seek(knetFile *fp, int64_t off, int whence); 69 | int knet_close(knetFile *fp); 70 | 71 | #ifdef __cplusplus 72 | } 73 | #endif 74 | 75 | #endif 76 | -------------------------------------------------------------------------------- /src/ucsc/tabix/kstring.h: -------------------------------------------------------------------------------- 1 | #ifndef KSTRING_H 2 | #define KSTRING_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #ifndef kroundup32 9 | #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 10 | #endif 11 | 12 | #ifndef KSTRING_T 13 | #define KSTRING_T kstring_t 14 | typedef struct __kstring_t { 15 | size_t l, m; 16 | char *s; 17 | } kstring_t; 18 | #endif 19 | 20 | int ksprintf(kstring_t *s, const char *fmt, ...); 21 | int ksplit_core(char *s, int delimiter, int *_max, int **_offsets); 22 | 23 | // calculate the auxiliary array, allocated by calloc() 24 | int *ksBM_prep(const uint8_t *pat, int m); 25 | 26 | /* Search pat in str and returned the list of matches. The size of the 27 | * list is returned as n_matches. _prep is the array returned by 28 | * ksBM_prep(). If it is a NULL pointer, ksBM_prep() will be called. */ 29 | int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches); 30 | 31 | static inline int kputsn(const char *p, int l, kstring_t *s) 32 | { 33 | if (s->l + l + 1 >= s->m) { 34 | s->m = s->l + l + 2; 35 | kroundup32(s->m); 36 | s->s = (char*)realloc(s->s, s->m); 37 | } 38 | strncpy(s->s + s->l, p, l); 39 | s->l += l; 40 | s->s[s->l] = 0; 41 | return l; 42 | } 43 | 44 | static inline int kputs(const char *p, kstring_t *s) 45 | { 46 | return kputsn(p, strlen(p), s); 47 | } 48 | 49 | static inline int kputc(int c, kstring_t *s) 50 | { 51 | if (s->l + 1 >= s->m) { 52 | s->m = s->l + 2; 53 | kroundup32(s->m); 54 | s->s = (char*)realloc(s->s, s->m); 55 | } 56 | s->s[s->l++] = c; 57 | s->s[s->l] = 0; 58 | return c; 59 | } 60 | 61 | static inline int *ksplit(kstring_t *s, int delimiter, int *n) 62 | { 63 | int max = 0, *offsets = 0; 64 | *n = ksplit_core(s->s, delimiter, &max, &offsets); 65 | return offsets; 66 | } 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /src/variant_tools/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # This file is part of variant_tools, a software application to annotate, 4 | # summarize, and filter variants for next-gen sequencing ananlysis. 5 | # Please visit https://github.com/vatlab/varianttools for details. 6 | # 7 | # Copyright (C) 2011 - 2020 - 2013 Bo Peng (bpeng@mdanderson.org) 8 | # 9 | # This program is free software: you can redistribute it and/or modify 10 | # it under the terms of the GNU General Public License as published by 11 | # the Free Software Foundation, either version 3 of the License, or 12 | # (at your option) any later version. 13 | # 14 | # This program is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU General Public License 20 | # along with this program. If not, see . 21 | # 22 | -------------------------------------------------------------------------------- /src/variant_tools/_version.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # This file is part of variant_tools, a software application to annotate, 4 | # summarize, and filter variants for next-gen sequencing ananlysis. 5 | # Please visit https://github.com/vatlab/varianttools for details. 6 | # 7 | # Copyright (C) 2011 - 2020 - 2013 Bo Peng (bpeng@mdanderson.org) 8 | # 9 | # This program is free software: you can redistribute it and/or modify 10 | # it under the terms of the GNU General Public License as published by 11 | # the Free Software Foundation, either version 3 of the License, or 12 | # (at your option) any later version. 13 | # 14 | # This program is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU General Public License 20 | # along with this program. If not, see . 21 | # 22 | 23 | import sys 24 | 25 | VTOOLS_VERSION = '3.1.4' 26 | 27 | pyver = sys.version_info 28 | VTOOLS_FULL_VERSION = '{} for Python {}.{}.{}'.format(VTOOLS_VERSION, 29 | pyver.major, pyver.minor, 30 | pyver.micro) 31 | VTOOLS_COPYRIGHT = '''variant tools {} : Copyright (c) 2011 - 2016 Bo Peng'''.format( 32 | VTOOLS_VERSION) 33 | VTOOLS_CONTACT = '''Please visit https://github.com/vatlab/varianttools for more information.''' 34 | -------------------------------------------------------------------------------- /src/variant_tools/checking_asso_result.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def checking_association(sql_file, hdf5_file): 4 | with open(str(sql_file)) as f1: 5 | result_sql = {} 6 | for line in f1: 7 | fields = line.split() 8 | if len(fields) == 7: 9 | result_sql[fields[0]] = fields[1:7] 10 | 11 | with open(str(hdf5_file)) as f2: 12 | result_h5 = {} 13 | for line in f2: 14 | fields = line.split() 15 | if len(fields) == 7: 16 | result_h5[fields[0]] = fields[1:7] 17 | 18 | count=0 19 | 20 | for key in result_h5 : 21 | if result_h5[key] != result_sql[key]: 22 | count+=1 23 | print("Values in %s are different:\n" % (key)," %s \n" % (result_sql["refgene_name2"]) ," in sql: %s \n in hdf5: %s" % (result_sql[key], result_h5[key])) 24 | 25 | if count!=0: 26 | print("there are total %d different results." % count) 27 | else: 28 | print("All result are the same!") 29 | 30 | if __name__ == "__main__": 31 | 32 | parser = argparse.ArgumentParser(description="the two files path and name, for sqlite and -for hdf5") 33 | parser.add_argument("-sql", 34 | help="for sqlite and -for sqlite") 35 | parser.add_argument("-h5", 36 | help="for hdf5 and -for hdf5") 37 | 38 | args = parser.parse_args() 39 | checking_association(args.sql, args.h5) 40 | -------------------------------------------------------------------------------- /src/variant_tools/genotypes.h: -------------------------------------------------------------------------------- 1 | #include 2 | #ifndef _GENO_H 3 | #define _GENO_H 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | void get_Genotypes(char* chr, int variant_id,int* samples,int numberOfSamples, char* genoFilter, int* sample_IDs); 8 | #ifdef __cplusplus 9 | } 10 | #endif 11 | #endif 12 | -------------------------------------------------------------------------------- /src/variant_tools/vtools_association_cluster.lsf: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #BSUB -W 1:00 3 | #BSUB -J vtools_asociation_cluster 4 | #BSUB -n 32 5 | #BSUB -N 6 | #BSUB -M 16384 7 | #BSUB -R "span[ptile=8]" 8 | #BSUB -q short 9 | 10 | 11 | [[ -z "${PROJECTFOLDER}" ]] && PROJECTFOLDER='path to project folder' || PROJECTFOLDER="${PROJECTFOLDER}" 12 | COMMAND='vtools associate variant disease --discard_variants %(NA)>0.1 --method "BurdenBt --name BurdenTest --alternative 2" --group_by refgene.name2 -j 8 -v 2 -mpi' 13 | NUMBER_OF_PROCESSES=24 14 | 15 | #LSB_HOSTS has the name of all the nodes running the jobs. 16 | NODE_LIST=($LSB_HOSTS) 17 | 18 | #The main program is running on the node with $HOSTNAME, the rest of nodes are saved into $WORKDER_NODES and written into hostlist.txt. 19 | WORKER_NODES=() 20 | for node in "${NODE_LIST[@]}"; 21 | do 22 | if [ "$node" != "$HOSTNAME" ]; then 23 | if [[ ! " ${WORKER_NODES[@]} " =~ " ${node} " ]]; then 24 | WORKER_NODES+=($node) 25 | fi 26 | fi 27 | done 28 | 29 | export ZEROMQIP=$(hostname --ip-address) 30 | export PROJECTFOLDER 31 | HOSTFILE="$PROJECTFOLDER/hostlist.txt" 32 | 33 | rm -rf $HOSTFILE 34 | for node in "${WORKER_NODES[@]}"; 35 | do 36 | echo "$node slots=8" >> $HOSTFILE 37 | done 38 | 39 | #Get the path for mpiexec 40 | MPIEXECPATH=$(which mpiexec) 41 | 42 | #The IP address for the main node $ZEROMQIP and $PROJECTFOLDER are needed for the woker script to communicate. 43 | $MPIEXECPATH -d -x PATH -H $HOSTNAME -np 1 -wdir $PROJECTFOLDER $COMMAND : -x ZEROMQIP -x PROJECTFOLDER -x PATH -hostfile $HOSTFILE -np $NUMBER_OF_PROCESSES worker_run 44 | -------------------------------------------------------------------------------- /src/variant_tools/vtools_association_cluster.pbs: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #PBS -l nodes=4:lowmem:ppn=8,walltime=01:00:00 3 | #PBS -V 4 | #PBS -q short 5 | 6 | 7 | [[ -z "${PROJECTFOLDER}" ]] && PROJECTFOLDER='path to project folder' || PROJECTFOLDER="${PROJECTFOLDER}" 8 | COMMAND='vtools associate variant disease --discard_variants %(NA)>0.1 --method "BurdenBt --name BurdenTest --alternative 2" --group_by refgene.name2 -j 8 -v 2 -mpi' 9 | NUMBER_OF_PROCESSES_PER_NODE=8 10 | 11 | #PBS_NODEFILE has the name of all the nodes running the jobs. 12 | NODE_LIST=`cat $PBS_NODEFILE | uniq` 13 | nodes=(`echo $NODE_LIST | cut -d " " --output-delimiter=" " -f 1-`) 14 | 15 | #The main program is running on the node with $HOSTNAME, the rest of nodes are saved into $WORKDER_NODES and written into hostlist.txt. 16 | WORKER_NODES=() 17 | for node in "${nodes[@]}" 18 | do 19 | if [ "$node" != "$HOSTNAME" ]; then 20 | WORKER_NODES+=($node) 21 | fi 22 | done 23 | 24 | export ZEROMQIP=$(ifconfig | sed -En 's/127.0.0.1//;s/.*inet (addr:)?(([0-9]*\.){3}[0-9]*).*/\2/p') 25 | export PROJECTFOLDER 26 | HOSTFILE="$PROJECTFOLDER/hostlist.txt" 27 | 28 | rm -rf $HOSTFILE 29 | for node in "${WORKER_NODES[@]}"; 30 | do 31 | echo $node >> $HOSTFILE 32 | done 33 | 34 | #Get the path for mpiexec 35 | MPIEXECPATH=$(which mpiexec) 36 | #The IP address for the main node $ZEROMQIP and $PROJECTFOLDER are needed for the woker script to communicate. 37 | $MPIEXECPATH -d -x PATH -H $HOSTNAME -np 1 -wdir $PROJECTFOLDER $COMMAND : -x ZEROMQIP -x PROJECTFOLDER -x PATH -hostfile $HOSTFILE -npernode $NUMBER_OF_PROCESSES_PER_NODE worker_run 38 | -------------------------------------------------------------------------------- /test/ann/testNSFP.DB.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/ann/testNSFP.DB.gz -------------------------------------------------------------------------------- /test/ann/testNSFP.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/ann/testNSFP.zip -------------------------------------------------------------------------------- /test/ann/testThousandGenomes.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/ann/testThousandGenomes.zip -------------------------------------------------------------------------------- /test/fmt/basic_hg18.fmt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Format/New for 5 | # a description of the format of this file. 6 | 7 | [format description] 8 | description=A basic variant input format with four columns: chr, pos, ref, alt. 9 | variant=chr,%(pos)s,%(ref)s,%(alt)s 10 | 11 | [DEFAULT] 12 | pos=pos 13 | pos_comment=Field for position. To export indel, set it to 'pos-length(upstream)' 14 | 15 | ref=ref 16 | ref_comment=Field for reference allele. 17 | 18 | alt=alt 19 | alt_comment=Field for alternative allele. 20 | 21 | [chr] 22 | index=1 23 | type=VARCHAR(20) 24 | adj=RemoveLeading('chr') 25 | comment=Chromosome 26 | 27 | [pos] 28 | index=2 29 | type=INTEGER NOT NULL 30 | comment=1-based position, hg18 31 | 32 | [ref] 33 | index=4 34 | type=VARCHAR(255) 35 | comment=Reference allele, '-' for insertion. 36 | 37 | [alt] 38 | index=5 39 | type=VARCHAR(255) 40 | comment=Alternative allele, '-' for deletion. 41 | -------------------------------------------------------------------------------- /test/fmt/dbSNP_hg19validation.fmt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Format/New for 5 | # a description of the format of this file. 6 | 7 | [format description] 8 | description=validation information from dbSNP. the input columns are: chr, pos. If the primary reference genome for the project is hg18 and it must have an alternative reference hg19. 9 | position=chr, %(pos)s 10 | variant_info=%(var_info)s 11 | 12 | [DEFAULT] 13 | pos=alt_pos 14 | pos_comment=Field for position. To export indel, set it to 'pos-length(upstream)' 15 | 16 | var_info=mut_type_dbSNP, validation 17 | var_info_comment=Variant information fields to be imported. 18 | 19 | [chr] 20 | index=1 21 | type=VARCHAR(20) 22 | adj=RemoveLeading('chr') 23 | comment=Chromosome 24 | 25 | [alt_pos] 26 | index=2 27 | type=INTEGER NOT NULL 28 | comment=1-based position, hg19 29 | 30 | [mut_type_dbSNP] 31 | index=3 32 | type=VARCHAR(255) 33 | comment=Functional cetegory of the SNP (coding-synon, coding-nonsynon, intron, etc.) 34 | 35 | [validation] 36 | index=4 37 | type=VARCHAR(255) 38 | comment=validation status, can be unknown, by-cluster, by-frequency, by-submitter, by-2hit-2allele, by-hapmap, and by-1000genomes 39 | -------------------------------------------------------------------------------- /test/fmt/genotypes.fmt: -------------------------------------------------------------------------------- 1 | [format description] 2 | description=Input format for variants with multiple sample genotypes. The input file genotypes.txt is prepared by transposing and combining fields from some Hapmap data file in PED/MAP format. 3 | variant=chr,%(pos)s,%(ref)s,%(alt)s 4 | genotype=%(geno)s 5 | variant_info=%(var_info)s 6 | genotype_info=%(geno_info)s 7 | 8 | [DEFAULT] 9 | pos=pos 10 | pos_comment=Field for position. To export indel, set it to 'pos-length(upstream)' 11 | 12 | ref=ref 13 | ref_comment=Field for reference allele. 14 | 15 | alt=alt 16 | alt_comment=Field for alternative allele. 17 | 18 | geno=GT 19 | geno_comment=Field to extract genotype from .vcf file. You can set it to 20 | safe_GT if genotype is not the first field in the genotype columns of your .vcf file. 21 | 22 | var_info=snp_id, genet_dist 23 | var_info_comment=Variant information fields to be imported. 24 | 25 | geno_info= 26 | geno_info_comment=Optional genotype info 27 | 28 | [chr] 29 | index=1 30 | type=VARCHAR(20) 31 | adj=RemoveLeading('chr') 32 | comment=Chromosome 33 | 34 | [snp_id] 35 | index=2 36 | type=VARCHAR(255) 37 | comment=rs number or snp identifier 38 | 39 | [genet_dist] 40 | index=3 41 | type=INTEGER 42 | comment=Genetic distance (morgans) 43 | 44 | [pos] 45 | index=4 46 | type=INTEGER NOT NULL 47 | comment=1-based Position of the snp 48 | 49 | [ref] 50 | index=5 51 | type=VARCHAR(255) 52 | comment=Reference allele 53 | 54 | [alt] 55 | index=6 56 | type=VARCHAR(255) 57 | comment=Alternative allele 58 | 59 | [GT] 60 | index=7: 61 | type=INTEGER 62 | adj=Nullify(['.', '0']) 63 | comment=Gentoype coded as 1 (ref alt) and 2 (alt alt) 64 | -------------------------------------------------------------------------------- /test/fmt/randcol.fmt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org) 2 | # Distributed under GPL. see 3 | # 4 | # Please refer to http://varianttools.sourceforge.net/Format/New for 5 | # a description of the format of this file. 6 | 7 | [format description] 8 | description=A basic variant input format with four columns: chr, pos, ref, alt. 9 | variant=chr,pos,ref,alt 10 | variant_info=%(var_info)s 11 | 12 | [DEFAULT] 13 | chr_col=1 14 | chr_col_comment=Column index for the chromosome field 15 | 16 | pos_col=2 17 | pos_col_comment=Column index for the position field 18 | 19 | ref_col=3 20 | ref_col_comment=Column index for the reference field 21 | 22 | alt_col=4 23 | alt_col_comment=Column index for the alternative field 24 | 25 | pos_adj=0 26 | pos_adj_comment=Set to 1 if the input position is zero-based. 27 | 28 | var_info= 29 | 30 | 31 | [chr] 32 | index=%(chr_col)s 33 | type=VARCHAR(20) 34 | adj=RemoveLeading('chr') 35 | comment=Chromosome 36 | 37 | [pos] 38 | index=%(pos_col)s 39 | adj=IncreaseBy(%(pos_adj)s) 40 | type=INTEGER NOT NULL 41 | comment=1-based position 42 | 43 | [ref] 44 | index=%(ref_col)s 45 | type=VARCHAR(255) 46 | comment=Reference allele, '-' for insertion. 47 | 48 | [alt] 49 | index=%(alt_col)s 50 | type=VARCHAR(255) 51 | comment=Alternative allele, '-' for deletion. 52 | 53 | [grpby] 54 | index=5 55 | type=VARCHAR(255) 56 | comment=group names 57 | -------------------------------------------------------------------------------- /test/output/assogrp1.txt: -------------------------------------------------------------------------------- 1 | chr pos ref alt group 2 | 1 742429 G A 2 3 | 1 742456 T G 0 4 | 1 742584 A G 1 5 | 1 743268 C A 2 6 | 1 743288 T C 2 7 | 1 743404 G A 1 8 | 1 743712 G T 0 9 | 1 744074 G A 0 10 | 1 744197 T C 1 11 | 1 744366 G A 2 12 | 22 49522492 A G 0 13 | 22 49522870 G C 1 14 | 22 49523030 T C 1 15 | 22 49524123 C T 2 16 | 22 49524956 G A 2 17 | 22 49525473 G C 0 18 | 22 49525866 C A 2 19 | 22 49529883 C T 2 20 | 22 49532714 G A 2 21 | 22 49533094 C T 2 22 | 22 49533142 T C 2 23 | 22 49534358 A G 1 24 | 22 49534570 T C 2 25 | 22 49534642 T C 2 26 | 22 49534747 G C 2 27 | 22 49534764 T C 1 28 | 22 49534781 C T 1 29 | -------------------------------------------------------------------------------- /test/output/assogrp2.txt: -------------------------------------------------------------------------------- 1 | chr pos ref alt group 2 | 1 742429 G A 4 3 | 1 742456 T G 0 4 | 1 742584 A G 3 5 | 1 743268 C A 4 6 | 1 743288 T C 0 7 | 1 743404 G A 1 8 | 1 743712 G T 1 9 | 1 744074 G A 2 10 | 1 744197 T C 4 11 | 1 744366 G A 3 12 | 22 49522492 A G 3 13 | 22 49522870 G C 0 14 | 22 49523030 T C 2 15 | 22 49524123 C T 2 16 | 22 49524956 G A 2 17 | 22 49525473 G C 4 18 | 22 49525866 C A 3 19 | 22 49529883 C T 3 20 | 22 49532714 G A 2 21 | 22 49533094 C T 3 22 | 22 49533142 T C 1 23 | 22 49534358 A G 1 24 | 22 49534570 T C 1 25 | 22 49534642 T C 1 26 | 22 49534747 G C 4 27 | 22 49534764 T C 3 28 | 22 49534781 C T 4 29 | -------------------------------------------------------------------------------- /test/output/assogrp3.txt: -------------------------------------------------------------------------------- 1 | chr pos ref alt group 2 | 1 742429 G A 7 3 | 1 742456 T G 0 4 | 1 742584 A G 6 5 | 1 743268 C A 5 6 | 1 743288 T C 2 7 | 1 743404 G A 5 8 | 1 743712 G T 8 9 | 1 744074 G A 7 10 | 1 744197 T C 3 11 | 1 744366 G A 4 12 | 22 49522492 A G 0 13 | 22 49522870 G C 4 14 | 22 49523030 T C 8 15 | 22 49524123 C T 5 16 | 22 49524956 G A 3 17 | 22 49525473 G C 5 18 | 22 49525866 C A 7 19 | 22 49529883 C T 1 20 | 22 49532714 G A 3 21 | 22 49533094 C T 5 22 | 22 49533142 T C 0 23 | 22 49534358 A G 0 24 | 22 49534570 T C 2 25 | 22 49534642 T C 5 26 | 22 49534747 G C 4 27 | 22 49534764 T C 8 28 | 22 49534781 C T 2 29 | -------------------------------------------------------------------------------- /test/output/assogrp4.txt: -------------------------------------------------------------------------------- 1 | chr pos ref alt group 2 | 1 742429 G A 0 3 | 1 742456 T G 4 4 | 1 742584 A G 8 5 | 1 743268 C A 0 6 | 1 743288 T C 10 7 | 1 743404 G A 5 8 | 1 743712 G T 5 9 | 1 744074 G A 11 10 | 1 744197 T C 2 11 | 1 744366 G A 2 12 | 22 49522492 A G 8 13 | 22 49522870 G C 4 14 | 22 49523030 T C 1 15 | 22 49524123 C T 2 16 | 22 49524956 G A 10 17 | 22 49525473 G C 10 18 | 22 49525866 C A 1 19 | 22 49529883 C T 3 20 | 22 49532714 G A 11 21 | 22 49533094 C T 10 22 | 22 49533142 T C 10 23 | 22 49534358 A G 6 24 | 22 49534570 T C 0 25 | 22 49534642 T C 10 26 | 22 49534747 G C 4 27 | 22 49534764 T C 9 28 | 22 49534781 C T 3 29 | -------------------------------------------------------------------------------- /test/output/assogrp5.txt: -------------------------------------------------------------------------------- 1 | chr pos ref alt group 2 | 1 742429 G A 6 3 | 1 742456 T G 11 4 | 1 742584 A G 7 5 | 1 743268 C A 8 6 | 1 743288 T C 6 7 | 1 743404 G A 12 8 | 1 743712 G T 9 9 | 1 744074 G A 6 10 | 1 744197 T C 12 11 | 1 744366 G A 7 12 | 22 49522492 A G 14 13 | 22 49522870 G C 1 14 | 22 49523030 T C 6 15 | 22 49524123 C T 9 16 | 22 49524956 G A 12 17 | 22 49525473 G C 13 18 | 22 49525866 C A 14 19 | 22 49529883 C T 12 20 | 22 49532714 G A 6 21 | 22 49533094 C T 12 22 | 22 49533142 T C 5 23 | 22 49534358 A G 8 24 | 22 49534570 T C 5 25 | 22 49534642 T C 2 26 | 22 49534747 G C 14 27 | 22 49534764 T C 5 28 | 22 49534781 C T 6 29 | -------------------------------------------------------------------------------- /test/output/assogrp6.txt: -------------------------------------------------------------------------------- 1 | chr pos ref alt group 2 | 1 742429 G A 15 3 | 1 742456 T G 9 4 | 1 742584 A G 13 5 | 1 743268 C A 12 6 | 1 743288 T C 5 7 | 1 743404 G A 14 8 | 1 743712 G T 4 9 | 1 744074 G A 15 10 | 1 744197 T C 12 11 | 1 744366 G A 14 12 | 22 49522492 A G 14 13 | 22 49522870 G C 17 14 | 22 49523030 T C 2 15 | 22 49524123 C T 2 16 | 22 49524956 G A 0 17 | 22 49525473 G C 0 18 | 22 49525866 C A 9 19 | 22 49529883 C T 14 20 | 22 49532714 G A 5 21 | 22 49533094 C T 11 22 | 22 49533142 T C 7 23 | 22 49534358 A G 5 24 | 22 49534570 T C 13 25 | 22 49534642 T C 19 26 | 22 49534747 G C 14 27 | 22 49534764 T C 16 28 | 22 49534781 C T 2 29 | -------------------------------------------------------------------------------- /test/output/assogrp7.txt: -------------------------------------------------------------------------------- 1 | chr pos ref alt group 2 | 1 742429 G A 1 3 | 1 742456 T G 3 4 | 1 742584 A G 2 5 | 1 743268 C A 22 6 | 1 743288 T C 5 7 | 1 743404 G A 15 8 | 1 743712 G T 1 9 | 1 744074 G A 21 10 | 1 744197 T C 15 11 | 1 744366 G A 21 12 | 22 49522492 A G 3 13 | 22 49522870 G C 2 14 | 22 49523030 T C 9 15 | 22 49524123 C T 24 16 | 22 49524956 G A 22 17 | 22 49525473 G C 21 18 | 22 49525866 C A 21 19 | 22 49529883 C T 0 20 | 22 49532714 G A 4 21 | 22 49533094 C T 13 22 | 22 49533142 T C 0 23 | 22 49534358 A G 4 24 | 22 49534570 T C 15 25 | 22 49534642 T C 7 26 | 22 49534747 G C 15 27 | 22 49534764 T C 3 28 | 22 49534781 C T 5 29 | -------------------------------------------------------------------------------- /test/output/assogrp8.txt: -------------------------------------------------------------------------------- 1 | chr pos ref alt group 2 | 1 742429 G A 29 3 | 1 742456 T G 0 4 | 1 742584 A G 14 5 | 1 743268 C A 12 6 | 1 743288 T C 9 7 | 1 743404 G A 27 8 | 1 743712 G T 28 9 | 1 744074 G A 2 10 | 1 744197 T C 6 11 | 1 744366 G A 24 12 | 22 49522492 A G 0 13 | 22 49522870 G C 21 14 | 22 49523030 T C 1 15 | 22 49524123 C T 16 16 | 22 49524956 G A 29 17 | 22 49525473 G C 12 18 | 22 49525866 C A 23 19 | 22 49529883 C T 18 20 | 22 49532714 G A 4 21 | 22 49533094 C T 26 22 | 22 49533142 T C 22 23 | 22 49534358 A G 17 24 | 22 49534570 T C 19 25 | 22 49534642 T C 13 26 | 22 49534747 G C 22 27 | 22 49534764 T C 27 28 | 22 49534781 C T 4 29 | -------------------------------------------------------------------------------- /test/output/assores1.txt: -------------------------------------------------------------------------------- 1 | 0 0.430306 0.46442 0.731729 2 | 1 0.0828568 0.381475 0.875376 3 | 2 -0.138939 0.274634 -1.09275 4 | -------------------------------------------------------------------------------- /test/output/assores2.txt: -------------------------------------------------------------------------------- 1 | 0 0.0669885 0.70313 0.381154 2 | 1 0.285605 0.352182 0.930582 3 | 3 -0.00624929 0.967045 -0.0413205 4 | 4 -0.0335427 0.750033 -0.31864 5 | -------------------------------------------------------------------------------- /test/output/assores3.txt: -------------------------------------------------------------------------------- 1 | 0 0.430306 0.46442 0.731729 2 | 1 -0.0609057 0.800118 -0.253228 3 | 2 0.152727 0.251324 1.14747 4 | 3 -0.259238 0.609967 -0.510201 5 | 4 -0.11057 0.361796 -0.912157 6 | 5 0.438295 0.334969 0.964387 7 | 8 -0.0218357 0.916581 -0.104755 8 | -------------------------------------------------------------------------------- /test/output/assores4.txt: -------------------------------------------------------------------------------- 1 | 0 0.156852 0.705634 0.37778 2 | 2 0.0479459 0.908071 0.115486 3 | 3 0.0962765 0.423544 0.800452 4 | 4 -0.109615 0.362665 -0.910508 5 | 5 0.438295 0.334969 0.964387 6 | 9 -0.0218357 0.916581 -0.104755 7 | -------------------------------------------------------------------------------- /test/output/assores5.txt: -------------------------------------------------------------------------------- 1 | 1 0.0313213 0.86489 0.170175 2 | 11 0.430306 0.46442 0.731729 3 | 12 0.00248295 0.98993 0.0126225 4 | 14 -0.255987 0.114477 -1.57908 5 | 5 0.0142086 0.939371 0.0760707 6 | 6 0.15144 0.280018 1.08057 7 | 7 0.662234 0.35682 0.921654 8 | -------------------------------------------------------------------------------- /test/output/assores6.txt: -------------------------------------------------------------------------------- 1 | 12 -0.259238 0.609967 -0.510201 2 | 13 0.156852 0.705634 0.37778 3 | 14 -0.121077 0.344582 -0.945374 4 | 16 -0.0218357 0.916581 -0.104755 5 | 17 0.0313213 0.86489 0.170175 6 | 2 0.15144 0.280018 1.08057 7 | 9 0.430306 0.46442 0.731729 8 | -------------------------------------------------------------------------------- /test/output/assores7.txt: -------------------------------------------------------------------------------- 1 | 0 -0.0609057 0.800118 -0.253228 2 | 15 -0.148412 0.285006 -1.06943 3 | 2 0.0313213 0.86489 0.170175 4 | 21 0.662234 0.35682 0.921654 5 | 3 0.0286987 0.88404 0.145869 6 | 5 0.15144 0.280018 1.08057 7 | -------------------------------------------------------------------------------- /test/output/assores8.txt: -------------------------------------------------------------------------------- 1 | 0 0.430306 0.46442 0.731729 2 | 18 -0.0609057 0.800118 -0.253228 3 | 19 0.156852 0.705634 0.37778 4 | 21 0.0313213 0.86489 0.170175 5 | 22 -0.255987 0.114477 -1.57908 6 | 24 0.662234 0.35682 0.921654 7 | 27 0.0582982 0.758744 0.307173 8 | 4 0.15144 0.280018 1.08057 9 | 6 -0.259238 0.609967 -0.510201 10 | -------------------------------------------------------------------------------- /test/output/assores_wss1.txt: -------------------------------------------------------------------------------- 1 | 0 0.745032 0.46442 0.731729 2 | 1 0.118456 0.814949 0.234077 3 | 2 -0.136058 0.791039 -0.264997 4 | -------------------------------------------------------------------------------- /test/output/assores_wss2.txt: -------------------------------------------------------------------------------- 1 | 0 0.461022 0.522973 0.638882 2 | 1 0.683604 0.342004 0.950439 3 | 3 0.173439 0.766826 0.29657 4 | 4 -1.06808 0.139063 -1.47988 5 | -------------------------------------------------------------------------------- /test/output/assores_wss3.txt: -------------------------------------------------------------------------------- 1 | 0 0.745032 0.46442 0.731729 2 | 1 -0.257819 0.800118 -0.253228 3 | 2 0.38392 0.705634 0.37778 4 | 3 -0.518217 0.609967 -0.510201 5 | 4 -0.166461 0.779178 -0.280429 6 | 5 0.979445 0.334969 0.964387 7 | 8 -0.164246 0.874862 -0.157506 8 | -------------------------------------------------------------------------------- /test/output/assores_wss4.txt: -------------------------------------------------------------------------------- 1 | 0 0.38392 0.705634 0.37778 2 | 2 0.20908 0.771191 0.290857 3 | 3 -0.257819 0.800118 -0.253228 4 | 4 -0.233108 0.694741 -0.392486 5 | 5 0.979445 0.334969 0.964387 6 | 9 -0.164246 0.874862 -0.157506 7 | -------------------------------------------------------------------------------- /test/output/assores_wss5.txt: -------------------------------------------------------------------------------- 1 | 1 0.173713 0.86489 0.170175 2 | 11 0.745032 0.46442 0.731729 3 | 12 0.0687264 0.907108 0.116701 4 | 14 -1.61089 0.114477 -1.57908 5 | 5 0.117689 0.871988 0.161155 6 | 6 0 1 0 7 | 7 0.936306 0.35682 0.921654 8 | -------------------------------------------------------------------------------- /test/output/assores_wss6.txt: -------------------------------------------------------------------------------- 1 | 12 -0.518217 0.609967 -0.510201 2 | 13 0.38392 0.705634 0.37778 3 | 14 0.0161445 0.974908 0.0314572 4 | 16 -0.164246 0.874862 -0.157506 5 | 17 0.173713 0.86489 0.170175 6 | 2 0 1 0 7 | 9 0.745032 0.46442 0.731729 8 | -------------------------------------------------------------------------------- /test/output/assores_wss7.txt: -------------------------------------------------------------------------------- 1 | 0 -0.257819 0.800118 -0.253228 2 | 15 -0.190776 0.709542 -0.372525 3 | 2 0.173713 0.86489 0.170175 4 | 21 0.936306 0.35682 0.921654 5 | 3 0.302467 0.6787 0.414299 6 | 5 0 1 0 7 | -------------------------------------------------------------------------------- /test/output/assores_wss8.txt: -------------------------------------------------------------------------------- 1 | 0 0.745032 0.46442 0.731729 2 | 18 -0.257819 0.800118 -0.253228 3 | 19 0.38392 0.705634 0.37778 4 | 21 0.173713 0.86489 0.170175 5 | 22 -1.61089 0.114477 -1.57908 6 | 24 0.936306 0.35682 0.921654 7 | 27 0.424429 0.560597 0.582052 8 | 4 0 1 0 9 | 6 -0.518217 0.609967 -0.510201 10 | -------------------------------------------------------------------------------- /test/output/evsVariantTest.txt: -------------------------------------------------------------------------------- 1 | id ref alt DP MQ ANNO SVM 2 | 1 G A 472 28 nonsynonymous:OR4F5:NM_001005484:exon1:c.G26A:p.G9D, -1.4352462 3 | 2 A G 602 28 nonsynonymous:OR4F5:NM_001005484:exon1:c.A44G:p.E15G, -1.2097349 4 | 3 A G 4094 31 synonymous:OR4F5:NM_001005484:exon1:c.A180G:p.S60S, -0.9014406 5 | 4 T G 525238 33 nonsynonymous:OR4F5:NM_001005484:exon1:c.T338G:p.F113C, -0.85266604 6 | 5 G C 48194 58 intronic:COL16A1 0.053230746 7 | 6 G T 48194 58 intronic:COL16A1 0.053230746 8 | -------------------------------------------------------------------------------- /test/output/exclude_anno1.txt: -------------------------------------------------------------------------------- 1 | 761732 2 | 761752 3 | 761800 4 | 761811 5 | 762589 6 | 762592 7 | -------------------------------------------------------------------------------- /test/output/exclude_sift.txt: -------------------------------------------------------------------------------- 1 | 619 2 | -------------------------------------------------------------------------------- /test/output/genotype_variant_sample_output.txt: -------------------------------------------------------------------------------- 1 | 3 0 3 2 | 7 0 7 3 | 7 0 7 4 | 4 0 4 5 | 4 0 4 6 | 5 0 5 7 | 4 0 4 8 | 6 0 6 9 | 3 0 3 10 | 4 0 4 11 | 7 0 7 12 | 8 0 8 13 | 8 0 8 14 | 8 0 8 15 | 5 0 5 16 | 6 0 6 17 | 9 0 9 18 | 8 0 8 19 | 9 0 9 20 | 5 0 5 21 | 9 0 9 22 | 8 0 8 23 | 7 0 7 24 | 3 0 3 25 | 7 0 7 26 | 5 0 5 27 | 7 0 7 28 | 5 0 5 29 | 7 0 7 30 | 4 0 4 31 | 6 0 6 32 | 4 0 4 33 | 5 0 5 34 | 7 0 7 35 | 3 0 3 36 | 3 0 3 37 | 6 0 6 38 | 6 0 6 39 | 5 0 5 40 | 5 0 5 41 | 5 0 5 42 | 4 0 4 43 | 7 0 7 44 | 5 0 5 45 | 6 0 6 46 | 9 0 9 47 | 6 0 6 48 | 2 0 2 49 | 9 0 9 50 | -------------------------------------------------------------------------------- /test/output/import_cga.txt: -------------------------------------------------------------------------------- 1 | 1 38907 C T 2 | 1 41981 A G 3 | 1 46670 A G 4 | 1 47108 G C 5 | 1 47292 T G 6 | 1 49272 G A 7 | 1 49291 C T 8 | 1 49342 G T 9 | 1 49363 C T 10 | 1 51476 T C 11 | 1 51673 T C 12 | 1 52238 T G 13 | 1 52727 C G 14 | 1 53206 G C 15 | 1 54043 C T 16 | 1 54586 T C 17 | 1 54676 C T 18 | 1 55164 C A 19 | 1 55381 G A 20 | 1 55394 T A 21 | 1 55545 C T 22 | 1 55550 A T 23 | 1 55816 G A 24 | 1 55850 C G 25 | 1 55926 T C 26 | 1 55976 T C 27 | 1 56485 C T 28 | 1 56638 C T 29 | 1 56799 T C 30 | 1 57246 C G 31 | 1 57376 C T 32 | 1 57952 A C 33 | 1 57999 G T 34 | 1 58211 A G 35 | 1 58349 A G 36 | 1 58812 C T 37 | 1 59051 A G 38 | 1 59276 C T 39 | 1 59498 T C 40 | 1 60273 T C 41 | 1 60408 C T 42 | 1 60726 C A 43 | 1 60791 A G 44 | 1 61290 - G 45 | 1 61442 A G 46 | 1 61480 G C 47 | 1 61499 G A 48 | 1 62180 T G 49 | 1 62190 A C 50 | 1 62203 T C 51 | 1 62298 - CTTC 52 | 1 62578 G A 53 | 1 63074 A C 54 | 1 63792 G T 55 | 1 64125 C T 56 | 1 64976 C T 57 | 1 66008 C G 58 | 1 66131 C G 59 | 1 67184 G A 60 | 1 67223 C A 61 | 1 67242 A C 62 | 1 67605 T C 63 | 1 68306 C T 64 | 1 68316 T C 65 | 1 69511 A G 66 | 1 69552 G C 67 | 1 69569 T C 68 | 1 72787 C T 69 | 1 76846 T A 70 | 1 77110 A G 71 | 1 78035 G A 72 | 1 79078 G C 73 | 1 79202 G A 74 | 1 80141 A G 75 | 1 80443 AACAA - 76 | 1 81100 T G 77 | 1 81204 T C 78 | 1 81374 T C 79 | 1 81437 G A 80 | 1 82734 T C 81 | 1 85150 G A 82 | 1 87683 T C 83 | 1 87702 G A 84 | 1 87805 T C 85 | 1 87959 T C 86 | 1 88169 C T 87 | 1 88172 G A 88 | 1 88265 C T 89 | 1 88295 T A 90 | 1 88463 A G 91 | 1 91581 G A 92 | 1 92638 A T 93 | 1 92654 T C 94 | 1 107332 T C 95 | 1 243851 AAGT - 96 | -------------------------------------------------------------------------------- /test/output/import_cga_phenotype.txt: -------------------------------------------------------------------------------- 1 | sample_name filename num_genotypes sample_genotype_fields 2 | samp_csv txt/CGA.tsv.bz2 95 GT,allele1VarScoreVAF,allele2VarScoreVAF,allele1VarScoreEAF,allele2VarScoreEAF 3 | -------------------------------------------------------------------------------- /test/output/import_genotype_1.txt: -------------------------------------------------------------------------------- 1 | 1 rs2843403 0 2518957 C T 2 | 1 rs4648462 0 3155127 A C 3 | 1 rs7410846 0 3926588 G A 4 | 1 rs1490413 0 4267183 G A 5 | 1 rs1878052 0 4452662 G A 6 | 1 rs2071999 0 4673126 A C 7 | 1 rs10915297 0 4910002 T C 8 | 1 rs521430 0 5206936 C T 9 | 1 rs1935759 0 5526603 T A 10 | 1 rs548726 0 5836208 C T 11 | 1 rs6680884 0 6190958 G A 12 | 1 rs277686 0 6670484 C G 13 | 1 rs2071917 0 7073114 G A 14 | 1 rs1750838 0 7441851 A G 15 | 1 rs228688 0 7801717 G T 16 | -------------------------------------------------------------------------------- /test/output/import_genotype_2.txt: -------------------------------------------------------------------------------- 1 | sample_name filename num_genotypes sample_genotype_fields 2 | V1 vcf/V1.vcf 989 GT 3 | DUP vcf/dup_geno.vcf 989 GT 4 | -------------------------------------------------------------------------------- /test/output/import_mpi_multi_genotypes.txt: -------------------------------------------------------------------------------- 1 | sample_name filename num_genotypes sample_genotype_fields 2 | SAMP1 vcf/V1.vcf 989 GT 3 | SAMP1 vcf/V2.vcf 990 GT 4 | SAMP1 vcf/V3.vcf 988 GT 5 | -------------------------------------------------------------------------------- /test/output/import_mpi_multi_samples.txt: -------------------------------------------------------------------------------- 1 | sample_name filename 2 | SAMP1 vcf/V1.vcf 3 | SAMP1 vcf/V2.vcf 4 | SAMP1 vcf/V3.vcf 5 | -------------------------------------------------------------------------------- /test/output/import_mpi_multi_variant.txt: -------------------------------------------------------------------------------- 1 | Name: variant 2 | Description: Master variant table 3 | Creation date: Jan14 4 | Command: 5 | Fields: variant_id, bin, chr, pos, ref, alt 6 | Number of variants: 1611 7 | -------------------------------------------------------------------------------- /test/output/import_mpi_samples.txt: -------------------------------------------------------------------------------- 1 | sample_name filename 2 | NA06985 vcf/CEU.vcf.gz 3 | NA06986 vcf/CEU.vcf.gz 4 | NA06994 vcf/CEU.vcf.gz 5 | NA07000 vcf/CEU.vcf.gz 6 | NA07037 vcf/CEU.vcf.gz 7 | NA07051 vcf/CEU.vcf.gz 8 | NA07346 vcf/CEU.vcf.gz 9 | NA07347 vcf/CEU.vcf.gz 10 | NA07357 vcf/CEU.vcf.gz 11 | NA10847 vcf/CEU.vcf.gz 12 | NA10851 vcf/CEU.vcf.gz 13 | NA11829 vcf/CEU.vcf.gz 14 | NA11830 vcf/CEU.vcf.gz 15 | NA11831 vcf/CEU.vcf.gz 16 | NA11832 vcf/CEU.vcf.gz 17 | NA11840 vcf/CEU.vcf.gz 18 | NA11881 vcf/CEU.vcf.gz 19 | NA11894 vcf/CEU.vcf.gz 20 | NA11918 vcf/CEU.vcf.gz 21 | NA11919 vcf/CEU.vcf.gz 22 | NA11920 vcf/CEU.vcf.gz 23 | NA11931 vcf/CEU.vcf.gz 24 | NA11992 vcf/CEU.vcf.gz 25 | NA11993 vcf/CEU.vcf.gz 26 | NA11994 vcf/CEU.vcf.gz 27 | NA11995 vcf/CEU.vcf.gz 28 | NA12003 vcf/CEU.vcf.gz 29 | NA12004 vcf/CEU.vcf.gz 30 | NA12005 vcf/CEU.vcf.gz 31 | NA12006 vcf/CEU.vcf.gz 32 | NA12043 vcf/CEU.vcf.gz 33 | NA12044 vcf/CEU.vcf.gz 34 | NA12045 vcf/CEU.vcf.gz 35 | NA12144 vcf/CEU.vcf.gz 36 | NA12154 vcf/CEU.vcf.gz 37 | NA12155 vcf/CEU.vcf.gz 38 | NA12156 vcf/CEU.vcf.gz 39 | NA12234 vcf/CEU.vcf.gz 40 | NA12249 vcf/CEU.vcf.gz 41 | NA12287 vcf/CEU.vcf.gz 42 | NA12414 vcf/CEU.vcf.gz 43 | NA12489 vcf/CEU.vcf.gz 44 | NA12716 vcf/CEU.vcf.gz 45 | NA12717 vcf/CEU.vcf.gz 46 | NA12749 vcf/CEU.vcf.gz 47 | NA12750 vcf/CEU.vcf.gz 48 | NA12751 vcf/CEU.vcf.gz 49 | NA12760 vcf/CEU.vcf.gz 50 | NA12761 vcf/CEU.vcf.gz 51 | NA12762 vcf/CEU.vcf.gz 52 | NA12763 vcf/CEU.vcf.gz 53 | NA12776 vcf/CEU.vcf.gz 54 | NA12812 vcf/CEU.vcf.gz 55 | NA12813 vcf/CEU.vcf.gz 56 | NA12814 vcf/CEU.vcf.gz 57 | NA12815 vcf/CEU.vcf.gz 58 | NA12828 vcf/CEU.vcf.gz 59 | NA12872 vcf/CEU.vcf.gz 60 | NA12873 vcf/CEU.vcf.gz 61 | NA12874 vcf/CEU.vcf.gz 62 | -------------------------------------------------------------------------------- /test/output/import_mpi_variant.txt: -------------------------------------------------------------------------------- 1 | Name: variant 2 | Description: Master variant table 3 | Creation date: Jan14 4 | Command: 5 | Fields: variant_id, bin, chr, pos, ref, alt 6 | Number of variants: 288 7 | -------------------------------------------------------------------------------- /test/output/import_multi_sample2_samples.txt: -------------------------------------------------------------------------------- 1 | sample_name filename 2 | SMP1 txt/sample_1_chr22.txt 3 | SMP2 txt/sample_1_chr22.txt 4 | SMP3 txt/sample_1_chr22.txt 5 | -------------------------------------------------------------------------------- /test/output/import_multi_sample2_samples_hdf5.txt: -------------------------------------------------------------------------------- 1 | sample_name filename 2 | SMP1 txt/sample_1_chr22.txt 3 | SMP2 txt/sample_1_chr22.txt 4 | SMP3 txt/sample_1_chr22.txt 5 | -------------------------------------------------------------------------------- /test/output/import_multi_sample2_variant.txt: -------------------------------------------------------------------------------- 1 | Name: variant 2 | Description: Master variant table 3 | Creation date: Jan13 4 | Command: 5 | Fields: variant_id, bin, chr, pos, ref, alt 6 | Number of variants: 9 7 | -------------------------------------------------------------------------------- /test/output/import_multi_sample_samples.txt: -------------------------------------------------------------------------------- 1 | sample_name filename 2 | SMP1 txt/sample_chr22.txt 3 | SMP2 txt/sample_chr22.txt 4 | SMP3 txt/sample_chr22.txt 5 | -------------------------------------------------------------------------------- /test/output/import_multi_sample_samples_hdf5.txt: -------------------------------------------------------------------------------- 1 | sample_name filename 2 | SMP1 txt/sample_chr22.txt 3 | SMP2 txt/sample_chr22.txt 4 | SMP3 txt/sample_chr22.txt 5 | -------------------------------------------------------------------------------- /test/output/import_multi_sample_variant.txt: -------------------------------------------------------------------------------- 1 | Name: variant 2 | Description: Master variant table 3 | Creation date: Jan13 4 | Command: 5 | Fields: variant_id, bin, chr, pos, ref, alt 6 | Number of variants: 6 7 | -------------------------------------------------------------------------------- /test/output/import_vcf_ref.txt: -------------------------------------------------------------------------------- 1 | 1 10434 - C 2 | 1 54790 - T 3 | 1 81963 - AA 4 | 1 82134 - AAAAAAAAAAAAAA 5 | 1 83787 - A 6 | 1 83873 - AG 7 | 1 83932 - A 8 | 1 83936 - AAA 9 | 1 87277 - T 10 | 1 91552 - T 11 | 1 120987 - TA 12 | 1 120996 - TAT 13 | 1 121018 - TATC 14 | 1 121047 - TAATAT 15 | 1 121051 - ATC 16 | 1 121063 - AACA 17 | 1 121065 - TTG 18 | 1 121070 - C 19 | 1 121073 - TAC 20 | 1 121083 - TATCT 21 | 1 241160 - TTC 22 | 1 718787 - T 23 | 1 723805 - A 24 | 1 724138 - AATGG 25 | 1 724138 - AATGGAATGGAATGG 26 | 1 724189 - ATGGAATGGG 27 | 1 724499 - C 28 | 1 726945 - GAATG 29 | 1 746215 - AAC 30 | 1 749964 - AA 31 | 1 750063 - G 32 | 1 753842 - C 33 | 1 761958 - T 34 | 1 768117 - GTTTT 35 | 1 768118 - TT 36 | 1 768625 - A 37 | 1 770426 - TCCCTCTCCCTTGCCTCCCTCCCCATCCATCTGCCCATCCCTCCATCCACCTCTTCATCTCTCCTTTCCTCCC 38 | 1 773870 - A 39 | 1 774008 - AGC 40 | 1 774884 - GACACACACACCTAGACACACACACCTGGACACACACACGTA 41 | 1 775257 - AAAG 42 | 1 778303 - CT 43 | 1 778303 - CCT 44 | 1 779912 - T 45 | 1 782959 - G 46 | 1 784987 - T 47 | 1 787071 - G 48 | 1 790697 - AT 49 | 1 790697 - TA 50 | 1 791108 - A 51 | 1 791130 - A 52 | 1 791759 - A 53 | 1 795531 - G 54 | 1 795559 - TTTTTT 55 | 1 796473 - C 56 | 1 797131 - TAA 57 | 1 800617 - C 58 | 1 801996 - TGGTCCTCCCTCTGCACTCACATCCCTGACGTCCTCCCGAGCCCTCACA 59 | 1 802232 - GCCCTCACGTGGTCCTCCCCCTGCACTCACATCCCTGACGTCCTCCCGAGCCCTCACATGGTCCTCCCCCTGCACTCACATCCCTGACATCCTCCCGT 60 | 1 803936 - A 61 | 1 804256 - C 62 | 1 804328 - C 63 | 1 807303 - T 64 | 1 807303 - TT 65 | 1 813464 - C 66 | 1 814375 - TTG 67 | 1 817761 - TAT 68 | 1 818003 - C 69 | 1 818346 - T 70 | 1 818563 - A 71 | 1 819027 - T 72 | 1 819517 - T 73 | 1 819702 - GTCTATGT 74 | -------------------------------------------------------------------------------- /test/output/missing_gen.tped: -------------------------------------------------------------------------------- 1 | 1 . . 69116 0 0 0 0 0 0 0 0 2 | 1 . . 69134 0 0 0 0 0 0 0 0 3 | 1 . . 69270 0 0 0 0 0 0 0 0 4 | 1 . . 69428 T T T T T T 0 0 5 | -------------------------------------------------------------------------------- /test/output/remove_field_after.txt: -------------------------------------------------------------------------------- 1 | Name: variant 2 | Description: Master variant table 3 | Creation date: Jan18 4 | Command: 5 | Fields: variant_id, bin, chr, pos, ref, alt 6 | Number of variants: 2144 7 | -------------------------------------------------------------------------------- /test/output/remove_field_before.txt: -------------------------------------------------------------------------------- 1 | Name: variant 2 | Description: Master variant table 3 | Creation date: Jan18 4 | Command: 5 | Fields: variant_id, bin, chr, pos, ref, alt, CEU_cases_num, 6 | DP, gene_name 7 | Number of variants: 2144 8 | -------------------------------------------------------------------------------- /test/output/update_sum_stat.txt: -------------------------------------------------------------------------------- 1 | . . . 2 | . . . 3 | . . . 4 | 100 15 69.3333 5 | 6 3 4.0000 6 | 4 3 3.3333 7 | -------------------------------------------------------------------------------- /test/output/use_field.txt: -------------------------------------------------------------------------------- 1 | 9468354 - A 1 2 | -------------------------------------------------------------------------------- /test/output/use_position.txt: -------------------------------------------------------------------------------- 1 | 9468354 - A 1 2 | -------------------------------------------------------------------------------- /test/output/vcf_assigned_sample_name_genotype.txt: -------------------------------------------------------------------------------- 1 | sample_name filename num_genotypes sample_genotype_fields 2 | samp_vcf1 vcf/SAMP1.vcf 289 GT 3 | samp_vcf2 vcf/SAMP2.vcf 288 GT 4 | samp_vcf3 vcf/SAMP...x_variants.vcf 135 5 | -------------------------------------------------------------------------------- /test/output/vcf_single_sampleName_genotype.txt: -------------------------------------------------------------------------------- 1 | sample_name filename num_genotypes sample_genotype_fields 2 | SAMP1 vcf/SAMP1.vcf 289 GT 3 | SAMP2 vcf/SAMP2.vcf 288 GT 4 | -------------------------------------------------------------------------------- /test/phenotype/phenotype.txt: -------------------------------------------------------------------------------- 1 | sample_name aff sex BMI 2 | NA06985 2 F 19.64 3 | NA06986 1 M None 4 | NA06994 1 F 19.49 5 | NA07000 2 F 21.52 6 | NA07037 2 F 23.05 7 | NA07051 1 F 21.01 8 | NA07346 1 F 18.93 9 | NA07347 2 M 19.2 10 | NA07357 2 M 20.61 11 | NA10847 2 M 14.6 12 | NA10851 2 M 22.28 13 | NA11829 1 M 20.58 14 | NA11830 1 F 16.64 15 | NA11831 1 F 18.79 16 | NA11832 2 F 19.74 17 | NA11840 2 M 18.82 18 | NA11881 2 F 17.79 19 | NA11894 2 F 19.22 20 | NA11918 2 F 25.56 21 | NA11919 1 F 22.28 22 | NA11920 2 F 23.18 23 | NA11931 2 F 16.15 24 | NA11992 2 F 20.41 25 | NA11993 2 F 17.09 26 | NA11994 2 M 23.74 27 | NA11995 1 F 17.15 28 | NA12003 2 M 24.13 29 | NA12004 2 F 20.31 30 | NA12005 2 M 17.73 31 | NA12006 2 M 19.99 32 | NA12043 2 F 21.91 33 | NA12044 1 M 21.07 34 | NA12045 1 F 23.73 35 | NA12144 1 F 21.15 36 | NA12154 2 M 22.35 37 | NA12155 1 F 21.39 38 | NA12156 1 F 21.67 39 | NA12234 2 F 15.31 40 | NA12249 2 M 21.36 41 | NA12287 2 F 24.58 42 | NA12414 1 F 20.67 43 | NA12489 1 F 20.29 44 | NA12716 1 F 16.92 45 | NA12717 1 F 20.38 46 | NA12749 1 F 21.25 47 | NA12750 1 F 17.7 48 | NA12751 1 F 25.97 49 | NA12760 2 F 23.35 50 | NA12761 2 M 20.71 51 | NA12762 2 M 20.3 52 | NA12763 1 M 19.6 53 | NA12776 2 M 21.01 54 | NA12812 2 F 18.22 55 | NA12813 2 M 18.16 56 | NA12814 1 F 24.41 57 | NA12815 2 M 17.33 58 | NA12828 1 M 18.69 59 | NA12872 2 M 21.15 60 | NA12873 2 F 20.32 61 | NA12874 1 M 19.41 62 | SAMP1 1 M 22.78 63 | SAMP2 2 F 24.43 64 | -------------------------------------------------------------------------------- /test/plink/dat1.bed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/plink/dat1.bed -------------------------------------------------------------------------------- /test/plink/dat1.fam: -------------------------------------------------------------------------------- 1 | 1 1 0 0 0 -9 2 | 2 2 0 0 0 -9 3 | 3 3 0 0 0 -9 4 | 4 4 0 0 0 -9 5 | 5 5 0 0 0 -9 6 | 6 6 0 0 0 -9 7 | 7 7 0 0 0 -9 8 | 8 8 0 0 0 -9 9 | 9 9 0 0 0 -9 10 | 10 10 0 0 0 -9 11 | 11 11 0 0 0 -9 12 | 12 12 0 0 0 -9 13 | 13 13 0 0 0 -9 14 | 14 14 0 0 0 -9 15 | 15 15 0 0 0 -9 16 | 16 16 0 0 0 -9 17 | 17 17 0 0 0 -9 18 | 18 18 0 0 0 -9 19 | 19 19 0 0 0 -9 20 | 20 20 0 0 0 -9 21 | 21 21 0 0 0 -9 22 | 22 22 0 0 0 -9 23 | 23 23 0 0 0 -9 24 | 24 24 0 0 0 -9 25 | 25 25 0 0 0 -9 26 | 26 26 0 0 0 -9 27 | 27 27 0 0 0 -9 28 | 28 28 0 0 0 -9 29 | 29 29 0 0 0 -9 30 | 30 30 0 0 0 -9 31 | 31 31 0 0 0 -9 32 | 32 32 0 0 0 -9 33 | 33 33 0 0 0 -9 34 | 34 34 0 0 0 -9 35 | 35 35 0 0 0 -9 36 | 36 36 0 0 0 -9 37 | 37 37 0 0 0 -9 38 | 38 38 0 0 0 -9 39 | 39 39 0 0 0 -9 40 | 40 40 0 0 0 -9 41 | 41 41 0 0 0 -9 42 | 42 42 0 0 0 -9 43 | 43 43 0 0 0 -9 44 | 44 44 0 0 0 -9 45 | 45 45 0 0 0 -9 46 | 46 46 0 0 0 -9 47 | 47 47 0 0 0 -9 48 | 48 48 0 0 0 -9 49 | 49 49 0 0 0 -9 50 | 50 50 0 0 0 -9 51 | 51 51 0 0 0 -9 52 | 52 52 0 0 0 -9 53 | 53 53 0 0 0 -9 54 | 54 54 0 0 0 -9 55 | 55 55 0 0 0 -9 56 | 56 56 0 0 0 -9 57 | 57 57 0 0 0 -9 58 | 58 58 0 0 0 -9 59 | 59 59 0 0 0 -9 60 | 60 60 0 0 0 -9 61 | 61 61 0 0 0 -9 62 | 62 62 0 0 0 -9 63 | 63 63 0 0 0 -9 64 | 64 64 0 0 0 -9 65 | 65 65 0 0 0 -9 66 | 66 66 0 0 0 -9 67 | 67 67 0 0 0 -9 68 | 68 68 0 0 0 -9 69 | 69 69 0 0 0 -9 70 | 70 70 0 0 0 -9 71 | 71 71 0 0 0 -9 72 | 72 72 0 0 0 -9 73 | 73 73 0 0 0 -9 74 | 74 74 0 0 0 -9 75 | 75 75 0 0 0 -9 76 | 76 76 0 0 0 -9 77 | 77 77 0 0 0 -9 78 | 78 78 0 0 0 -9 79 | 79 79 0 0 0 -9 80 | 80 80 0 0 0 -9 81 | 81 81 0 0 0 -9 82 | 82 82 0 0 0 -9 83 | 83 83 0 0 0 -9 84 | 84 84 0 0 0 -9 85 | 85 85 0 0 0 -9 86 | 86 86 0 0 0 -9 87 | 87 87 0 0 0 -9 88 | 88 88 0 0 0 -9 89 | 89 89 0 0 0 -9 90 | 90 90 0 0 0 -9 91 | 91 91 0 0 0 -9 92 | 92 92 0 0 0 -9 93 | 93 93 0 0 0 -9 94 | 94 94 0 0 0 -9 95 | 95 95 0 0 0 -9 96 | 96 96 0 0 0 -9 97 | 97 97 0 0 0 -9 98 | 98 98 0 0 0 -9 99 | 99 99 0 0 0 -9 100 | 100 100 0 0 0 -9 101 | -------------------------------------------------------------------------------- /test/proj/assoproj.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/proj/assoproj.tar.gz -------------------------------------------------------------------------------- /test/run_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # $File: ProcessTestCase $ 4 | # $LastChangedDate: 2011-06-16 20:10:41 -0500 (Thu, 16 Jun 2011) $ 5 | # $Rev: 4234 $ 6 | # 7 | # This file is part of variant_tools, a software application to annotate, 8 | # summarize, and filter variants for next-gen sequencing ananlysis. 9 | # Please visit http://varianttools.sourceforge.net for details. 10 | # 11 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org) 12 | # 13 | # This program is free software: you can redistribute it and/or modify 14 | # it under the terms of the GNU General Public License as published by 15 | # the Free Software Foundation, either version 3 of the License, or 16 | # (at your option) any later version. 17 | # 18 | # This program is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 | # GNU General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU General Public License 24 | # along with this program. If not, see . 25 | # 26 | 27 | import os 28 | import re 29 | import unittest 30 | import sys 31 | 32 | def importTests(): 33 | tests = unittest.TestSuite() 34 | for file in os.listdir('.'): 35 | match = re.match("^(test_(.*))\\.py$", file) 36 | if match: 37 | m = match.group(1) 38 | print("Adding test cases in %s" % m) 39 | module = __import__(m) 40 | tests.addTest(unittest.defaultTestLoader.loadTestsFromModule( module )) 41 | return tests 42 | 43 | if __name__ == '__main__': 44 | test_runner = unittest.TextTestRunner(verbosity=2) 45 | #test_runner.run(importTests()) 46 | ret = test_runner.run(importTests()) 47 | if ret.errors or ret.failures: 48 | sys.exit('test fail') 49 | -------------------------------------------------------------------------------- /test/txt/ANNOVAR.txt: -------------------------------------------------------------------------------- 1 | 1 161003087 161003087 C T comments: rs1000050, a SNP in Illumina SNP arrays 2 | 1 84647761 84647761 C T comments: rs6576700 or SNP_A-1780419, a SNP in Affymetrix SNP arrays 3 | 1 13133880 13133881 TC - comments: rs59770105, a 2-bp deletion 4 | 1 11326183 11326183 - AT comments: rs35561142, a 2-bp insertion 5 | 1 105293754 105293754 A ATAAA comments: rs10552169, a block substitution 6 | 1 67478546 67478546 G A comments: rs11209026 (R381Q), a SNP in IL23R associated with Crohn's disease 7 | 2 233848107 233848107 T C comments: rs2241880 (T300A), a SNP in the ATG16L1 associated with Crohn's disease 8 | 16 49303427 49303427 C T comments: rs2066844 (R702W), a non-synonymous SNP in NOD2 9 | 16 49314041 49314041 G C comments: rs2066845 (G908R), a non-synonymous SNP in NOD2 10 | 16 49321279 49321279 - C comments: rs2066847 (c.3016_3017insC), a frameshift SNP in NOD2 11 | 13 19661686 19661686 G - comments: rs1801002 (del35G), a frameshift mutation in GJB2, associated with hearing loss 12 | 13 19695176 20003944 0 - comments: a 342kb deletion encompassing GJB6, associated with hearing loss 13 | -------------------------------------------------------------------------------- /test/txt/CASAVA18_SNP.txt: -------------------------------------------------------------------------------- 1 | # ** CASAVA depth-filtered snp calls ** 2 | #$ CMDLINE /CASAVA-1.8.0a19/filterSmallVariants.pl --chrom=chr1 3 | #$ SEQ_MAX_DEPTH chr1 142.345267150165 4 | # 5 | #$ COLUMNS seq_name pos bcalls_used bcalls_filt ref Q(snp) max_gt Q(max_gt) max_gt|poly_site Q(max_gt|poly_site) A_used C_used G_used T_used 6 | chr1 10231 5 9 C 28 AC 28 AC 59 3 2 0 0 7 | chr1 10255 14 29 A 1 AA 9 AT 25 12 0 0 2 8 | chr1 10264 15 19 C 18 AC 18 AC 51 4 11 0 0 9 | chr1 10291 2 16 C 1 CC 10 CT 21 0 1 0 1 10 | chr1 10330 3 14 C 2 CC 5 AC 28 2 1 0 0 11 | chr1 13273 9 0 G 58 CG 54 CG 57 0 6 3 0 12 | chr1 14464 18 0 A 60 AT 60 AT 93 12 0 0 6 13 | chr1 14673 19 0 G 63 CG 63 CG 96 0 8 11 0 14 | chr1 14699 23 0 C 72 CG 72 CG 105 0 14 9 0 15 | chr1 14907 13 0 A 118 AG 65 AG 65 4 0 9 0 16 | chr1 14930 14 2 A 119 AG 68 AG 68 5 0 9 0 17 | chr1 14933 14 2 G 78 AG 78 AG 110 6 0 8 0 18 | chr1 14976 4 0 G 18 AG 18 AG 47 2 0 2 0 19 | chr1 15211 2 0 T 37 GG 5 GG 5 0 0 2 0 20 | chr1 15817 1 0 G 11 GT 3 GT 3 0 0 0 1 21 | chr1 15820 1 0 G 11 GT 3 GT 3 0 0 0 1 22 | chr1 16487 12 0 T 62 CT 62 CT 94 0 6 0 6 23 | chr1 17538 64 0 C 88 AC 88 AC 121 18 46 0 0 24 | chr1 17746 53 1 A 22 AG 22 AG 55 39 0 14 0 25 | chr1 17765 47 1 G 26 AG 26 AG 59 13 0 34 0 26 | chr1 20131 1 0 G 8 CG 2 CG 3 0 1 0 0 27 | chr1 20144 1 0 G 9 AG 2 AG 3 1 0 0 0 28 | chr1 20206 2 0 C 4 CT 4 CT 30 0 1 0 1 29 | chr1 20245 3 0 G 4 AG 4 AG 34 1 0 2 0 30 | chr1 20304 2 0 G 2 GG 5 CG 27 0 1 1 0 31 | -------------------------------------------------------------------------------- /test/txt/CGA.tsv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/txt/CGA.tsv.bz2 -------------------------------------------------------------------------------- /test/txt/assoc.fmt: -------------------------------------------------------------------------------- 1 | [format description] 2 | description=Input format for variants with multiple sample genotypes. 3 | delimiter=None 4 | variant=chr,%(pos)s,%(ref)s,%(alt)s 5 | genotype=%(geno)s 6 | 7 | [DEFAULT] 8 | pos=pos 9 | pos_comment=Field for position. 10 | 11 | ref=ref 12 | ref_comment=Field for reference allele. 13 | 14 | alt=alt 15 | alt_comment=Field for alternative allele. 16 | 17 | geno=GT 18 | geno_comment=Field to extract genotype. 19 | 20 | [chr] 21 | index=1 22 | type=VARCHAR(20) 23 | adj=RemoveLeading('chr') 24 | comment=Chromosome 25 | 26 | [pos] 27 | index=2 28 | type=INTEGER NOT NULL 29 | comment=1-based Position of the snp 30 | 31 | [ref] 32 | index=3 33 | type=VARCHAR(255) 34 | comment=Reference allele 35 | 36 | [alt] 37 | index=4 38 | type=VARCHAR(255) 39 | comment=Alternative allele 40 | 41 | [GT] 42 | index=5: 43 | type=INTEGER 44 | comment=Gentoype w/ 0,1,2 codings 45 | -------------------------------------------------------------------------------- /test/txt/complteGenomics.tsv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/txt/complteGenomics.tsv.bz2 -------------------------------------------------------------------------------- /test/txt/invalid.tsv: -------------------------------------------------------------------------------- 1 | CHR POS START REF ALT 2 | 1 75927 86064 G C 3 | 1 75927 86064 G 4 | 1 76193 86330 A G 5 | 1 77052 87189 G A 6 | 1 78178 88315 G A 7 | 1 78200 88337 G A 8 | 1 81398 91535 G T 9 | 1 98172 108309 T C 10 | 1 223335 233472 C G 11 | 1 224622 234759 A T 12 | 1 225791 235928 G A 13 | -------------------------------------------------------------------------------- /test/txt/pileup.indel: -------------------------------------------------------------------------------- 1 | chr10 57162 D1 G * homo 26 3 6 2 | chr10 62899 I4 AAAA * hete 31 17 33 3 | chr10 85429 I1 A * homo 38 29 32 4 | chr10 87126 I24 TGCATTTACGTGATCTTGGCTCAC * hete 51 10 38 5 | chr10 87668 D3 CTC * hete 52 27 34 6 | chr10 89301 D1 A * hete 37 7 45 7 | chr10 89448 I3 AGG * hete 27 4 31 8 | chr10 93681 I1 G * hete 21 12 111 9 | chr10 94117 I3 CAA * hete 29 29 81 10 | chr10 94848 D3 TTA * hete 54 7 48 11 | chr10 95775 I1 T * hete 44 6 44 12 | chr10 97572 D1 T * hete 44 8 55 13 | chr10 98719 I1 T * hete 48 13 39 14 | chr10 99022 I1 T * homo 52 19 36 15 | chr10 100224 D6 CCCTAA * hete 41 12 31 16 | chr10 100433 D6 ACCCTC * hete 50 2 20 17 | chr10 100799 I1 G * hete 50 4 22 18 | chr10 101382 D1 G * hete 54 12 39 19 | chr10 101729 D3 GTA * hete 51 19 58 20 | chr10 103093 D1 T * homo 57 23 33 21 | chr10 103731 D2 GA * hete 46 6 28 22 | chr10 106207 D9 TTGTTTTTG * hete 46 6 24 23 | chr10 106216 D4 TTTT * homo 49 11 19 24 | chr10 107344 I1 C * hete 54 6 32 25 | chr10 108119 I1 G * hete 31 7 19 26 | chr10 108176 I1 A * hete 46 3 22 27 | chr10 110565 D2 AA * hete 47 4 11 28 | chr10 110582 D2 AG * hete 51 2 13 29 | chr10 110806 D7 TTTTTTT * hete 55 5 14 30 | chr10 110829 I3 GGG * hete 45 2 13 31 | -------------------------------------------------------------------------------- /test/txt/sample_1_chr22.txt: -------------------------------------------------------------------------------- 1 | #chr pos end ref all1 all2 dbsnp SMP1 LS SMP2 LS SMP3 LS 2 | 22 16123379 16123379 A A G rs74370004 AA 10 AG 6 AG 25 3 | 22 16123425 16123425 T T C rs79052403 TT 9 CT 2 TT 14 4 | 22 16123469 16123469 T T G GG 4 GG 7 GT 16 5 | 22 16123488 16123488 G G T GG 5 GG 12 GG 14 6 | 22 16123496 16123496 G G A GG 6 GG 14 GG 15 7 | 22 16123524 16123524 A A C AA 4 AA 2 AA 14 8 | 22 16123531 16123531 G G T rs62226612 GG 5 GG 11 GG 13 9 | 22 16123762 16123762 C C G CC 33 CC 48 CC 25 10 | 22 16123793 16123793 G G A AG 11 AG 8 AG 21 11 | -------------------------------------------------------------------------------- /test/txt/sample_chr22.txt: -------------------------------------------------------------------------------- 1 | #chr pos end ref all1 all2 dbsnp SMP1 LS SMP2 LS SMP3 LS 2 | 22 16060526 16060526 T TCT - TT 3 T- 4 -- 4 3 | 22 16078617 16078618 TG TG - TT 0 T- 4 TT 0 4 | 22 16123379 16123379 A A G rs74370004 AA 10 AG 6 AG 25 5 | 22 16123409 16123410 - - G GG 10 GG 23 G- 14 6 | 22 16123425 16123425 T T C rs79052403 TT 9 CT 2 TT 14 7 | 22 16404838 16404839 - - GA GG 3 AG 5 GG 4 8 | -------------------------------------------------------------------------------- /test/txt/variants.txt: -------------------------------------------------------------------------------- 1 | 1 203148112 T - 2 | 1 203148168 G A 3 | 1 203148202 G C 4 | 1 203148224 G A 5 | 1 203148265 GG T 6 | 1 203148284 T C 7 | 1 203148294 G T 8 | 1 203148359 C A 9 | 1 203148360 G A 10 | 1 203148360 G C 11 | 1 203148510 G T 12 | 1 203148513 A T 13 | 1 203148633 A G 14 | 1 203148677 T C 15 | 1 203148727 C T 16 | 1 203148868 T C 17 | 1 203148989 - C 18 | 10 58118181 A C 19 | 10 58118185 C T 20 | 10 58120990 C T 21 | -------------------------------------------------------------------------------- /test/vcf/CEU.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/vcf/CEU.vcf.gz -------------------------------------------------------------------------------- /test/vcf/CEU.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/vcf/CEU.vcf.gz.tbi -------------------------------------------------------------------------------- /test/vcf/CEU_dup.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/vcf/CEU_dup.vcf.gz -------------------------------------------------------------------------------- /test/vcf/EMPTY.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.0 2 | ##FILTER= 3 | ##FORMAT= 4 | ##FORMAT= 5 | ##FORMAT= 6 | ##FORMAT= 7 | ##FilterLiftedVariants="analysis_type=FilterLiftedVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null reference_sequence=human_g1k_v37.fasta rodBind=[/tmp/0.251173662095429.sorted.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub" 8 | ##INFO= 9 | ##INFO= 10 | ##INFO= 11 | ##INFO= 12 | ##INFO= 13 | ##INFO= 14 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMP2 15 | -------------------------------------------------------------------------------- /test/vcf/chromX.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/vcf/chromX.vcf.gz -------------------------------------------------------------------------------- /test/vcf/compare.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.0 2 | ##INFO= 3 | ##INFO= 4 | ##INFO= 5 | ##INFO= 6 | ##INFO= 7 | ##INFO= 8 | ##FILTER= 9 | ##FORMAT= 10 | ##FORMAT= 11 | ##FORMAT= 12 | ##FORMAT= 13 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMP1 SAMP2 14 | 1 31705 rs3843004 A G 23 PASS DP=9;NS=1 GT 1/1 ./. 15 | 1 50195 rs62637816 T C,G 99 PASS DP=9;NS=1 GT 0/2 0/2 16 | 1 50589 rs2531295 C A 29 PASS DP=4;NS=1 GT ./. 0/1 17 | -------------------------------------------------------------------------------- /test/vcf/hdf5_test.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/vcf/hdf5_test.h5 --------------------------------------------------------------------------------