├── .gitignore
├── .gitlab-ci.yml
├── .gitlab
└── issue_templates
│ └── bug.md
├── .pre-commit-config.yaml
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── development
├── Linux
│ ├── extractor-template.sh
│ └── install.sh
├── MacOSX
│ ├── INSTALL
│ ├── postinstall.sh
│ └── variant_tools.pmdoc
│ │ ├── 01variant-contents.xml
│ │ ├── 01variant.xml
│ │ └── index.xml
├── conda
│ ├── build.sh
│ └── meta.yaml
├── docker
│ └── Dockerfile
├── docker_ci
│ └── Dockerfile
├── manage_resource.py
├── md5_annoDB.py
└── monitor.py
├── resources
├── annotation
│ ├── CancerGeneCensus-20111215.ann
│ ├── CancerGeneCensus-20120315.ann
│ ├── CancerGeneCensus-20130711.ann
│ ├── CancerGeneCensus-20170912.ann
│ ├── CosmicCodingMuts-v61_260912.ann
│ ├── CosmicCodingMuts-v67_20131024.ann
│ ├── CosmicCodingMuts-v82_20170801.ann
│ ├── CosmicMutantExport-v61_260912.ann
│ ├── CosmicMutantExport-v67_241013.ann
│ ├── CosmicMutantExport-v82_20170803.ann
│ ├── CosmicNonCodingVariants-v61_260912.ann
│ ├── CosmicNonCodingVariants-v67_241013.ann
│ ├── CosmicNonCodingVariants-v82_20170801.ann
│ ├── DGV-hg18_20130723.ann
│ ├── DGV-hg19_20130723.ann
│ ├── DGV-hg19_20160515.ann
│ ├── DGV-hg19_20160831.ann
│ ├── DGV-hg38_20160831.ann
│ ├── ESP-6500SI-V2-SSA137.ann
│ ├── EntrezGene-20131028.ann
│ ├── EntrezGene-20170919.ann
│ ├── EntrezGene2RefSeq-20131028.ann
│ ├── EntrezGene2RefSeq-20170919.ann
│ ├── ExAC-hg19_r0.2.ann
│ ├── ExAC.ann
│ ├── HGNC-20131029.ann
│ ├── HGNC-20170920.ann
│ ├── Illumina_NRCE-20130307.ann
│ ├── LCR-hg19_20090320.ann
│ ├── ccdsGene-hg19_20110909.ann
│ ├── ccdsGene-hg19_20111206.ann
│ ├── ccdsGene-hg19_20130904.ann
│ ├── ccdsGene-hg38_20171008.ann
│ ├── ccdsGene_exon-hg19_20110909.ann
│ ├── ccdsGene_exon-hg19_20111206.ann
│ ├── ccdsGene_exon-hg19_20130904.ann
│ ├── ccdsGene_exon-hg38_20171008.ann
│ ├── ccdsGene_exon_hg19-20111206.ann
│ ├── ccdsGene_exon_hg38-20171008.ann
│ ├── ccdsGene_hg19-20111206.ann
│ ├── clinvar-20150804.ann
│ ├── clinvar-20150929.ann
│ ├── clinvar-20160104.ann
│ ├── clinvar-20160107.ann
│ ├── clinvar-20171002.ann
│ ├── cytoBand-hg18_20111216.ann
│ ├── cytoBand-hg19_20111216.ann
│ ├── cytoBand-hg38_20140810.ann
│ ├── dbNSFP-hg18_hg19_1.1_2.ann
│ ├── dbNSFP-hg18_hg19_1_3.ann
│ ├── dbNSFP-hg18_hg19_2_0.ann
│ ├── dbNSFP-hg18_hg19_2_0b4.ann
│ ├── dbNSFP-hg18_hg19_2_1.ann
│ ├── dbNSFP-hg18_hg19_2_3.ann
│ ├── dbNSFP-hg18_hg19_2_4.ann
│ ├── dbNSFP-hg18_hg19_2_7.ann
│ ├── dbNSFP-hg18_hg19_2_9.ann
│ ├── dbNSFP-hg38_3_5a.ann
│ ├── dbNSFP_gene-2_0.ann
│ ├── dbNSFP_gene-2_1.ann
│ ├── dbNSFP_gene-2_3.ann
│ ├── dbNSFP_gene-2_4.ann
│ ├── dbNSFP_gene-2_7.ann
│ ├── dbNSFP_gene-3_5a.ann
│ ├── dbNSFP_light-hg18_hg19_1.0_0.ann
│ ├── dbNSFP_light-hg18_hg19_1_3.ann
│ ├── dbSNP-hg18_129.ann
│ ├── dbSNP-hg18_130.ann
│ ├── dbSNP-hg19_131.ann
│ ├── dbSNP-hg19_132.ann
│ ├── dbSNP-hg19_135-1.ann
│ ├── dbSNP-hg19_135.ann
│ ├── dbSNP-hg19_137.ann
│ ├── dbSNP-hg19_138.ann
│ ├── dbSNP-hg38_143.ann
│ ├── dbscSNV-hg19_20141120.ann
│ ├── dbscSNV-hg38_20150412.ann
│ ├── genomicSuperDups-hg19_20130626.ann
│ ├── genomicSuperDups-hg38_20141018.ann
│ ├── gwasCatalog-hg19_20111220.ann
│ ├── gwasCatalog-hg19_20140112.ann
│ ├── gwasCatalog-hg38_20171004.ann
│ ├── hapmap_ASW_freq-hg18_20100817.ann
│ ├── hapmap_CEU_freq-hg18_20100817.ann
│ ├── hapmap_CHB_freq-hg18_20100817.ann
│ ├── hapmap_CHD_freq-hg18_20100817.ann
│ ├── hapmap_GIH_freq-hg18_20100817.ann
│ ├── hapmap_JPT_freq-hg18_20100817.ann
│ ├── hapmap_LWK_freq-hg18_20100817.ann
│ ├── hapmap_MEX_freq-hg18_20100817.ann
│ ├── hapmap_MKK_freq-hg18_20100817.ann
│ ├── hapmap_TSI_freq-hg18_20100817.ann
│ ├── hapmap_YRI_freq-hg18_20100817.ann
│ ├── keggPathway-20110823.ann
│ ├── knownGene-hg18_20110909.ann
│ ├── knownGene-hg18_20121219.ann
│ ├── knownGene-hg19_20110909.ann
│ ├── knownGene-hg19_20121219.ann
│ ├── knownGene-hg19_20130904.ann
│ ├── knownGene-hg38_20160328.ann
│ ├── knownGene_exon-hg18_20110909.ann
│ ├── knownGene_exon-hg19_20110909.ann
│ ├── knownGene_exon-hg19_20130904.ann
│ ├── knownGene_exon-hg38_20160328.ann
│ ├── phastCons-hg19_20110909.ann
│ ├── phastCons-hg19_20130322.ann
│ ├── phastCons-hg38_20150913.ann
│ ├── phastConsElements-hg19_20130622.ann
│ ├── phastConsElements-hg38_20150913.ann
│ ├── refGene-hg18_20110909.ann
│ ├── refGene-hg19_20110909.ann
│ ├── refGene-hg19_20130904.ann
│ ├── refGene-hg38_20170201.ann
│ ├── refGene-hg38_20171008.ann
│ ├── refGene-mm10_20141201.ann
│ ├── refGene_exon-hg18_20110909.ann
│ ├── refGene_exon-hg19_20110909.ann
│ ├── refGene_exon-hg19_20130904.ann
│ ├── refGene_exon-mm10_20141201.ann
│ ├── refGene_exon-mm10_20171008.ann
│ ├── thousandGenomes-hg19_v3_20101123.ann
│ └── thousandGenomes-hg19_v5b_20130502.ann
├── format
│ ├── ANNOVAR.fmt
│ ├── ANNOVAR_exonic_variant_function.fmt
│ ├── ANNOVAR_variant_function.fmt
│ ├── CASAVA18_indels.fmt
│ ├── CASAVA18_snps.fmt
│ ├── CGA.fmt
│ ├── MAF.fmt
│ ├── basic.fmt
│ ├── csv.fmt
│ ├── map.fmt
│ ├── pileup_indel.fmt
│ ├── plink.fmt
│ ├── polyphen2.fmt
│ ├── rsname.fmt
│ ├── tped.fmt
│ ├── twoalleles.fmt
│ └── vcf.fmt
├── pipeline
│ ├── ANNOVAR.pipeline
│ ├── DNASeq_tools.py
│ ├── KING.pipeline
│ ├── anno_utils.pipeline
│ ├── bwa_gatk28_b37.pipeline
│ ├── bwa_gatk28_hg19.pipeline
│ ├── bwa_gatk33_b37.pipeline
│ ├── bwa_gatk33_hg19.pipeline
│ ├── illumina.pipeline
│ ├── import_vcf.pipeline
│ ├── mosaik_gatk23_align.pipeline
│ ├── snpEff.pipeline
│ └── transmission.pipeline
└── simulation
│ ├── Lineage.pipeline
│ ├── Lineage.py
│ ├── Peng2011_srv.pipeline
│ ├── Peng2014_ex1.pipeline
│ ├── Peng2014_ex2.pipeline
│ └── VST_srv.py
├── setup.py
├── src
├── cgatools
│ ├── core.hpp
│ ├── reference
│ │ ├── ChromosomeIdField.cpp
│ │ ├── ChromosomeIdField.hpp
│ │ ├── CompactDnaSequence.cpp
│ │ ├── CompactDnaSequence.hpp
│ │ ├── CrrFile.cpp
│ │ ├── CrrFile.hpp
│ │ ├── CrrFileWriter.cpp
│ │ ├── CrrFileWriter.hpp
│ │ ├── GeneDataStore.cpp
│ │ ├── GeneDataStore.hpp
│ │ ├── RangeAnnotationStore.hpp
│ │ ├── RepeatMaskerStore.hpp
│ │ └── range.hpp
│ └── util
│ │ ├── BaseUtil.cpp
│ │ ├── BaseUtil.hpp
│ │ ├── DelimitedFile.cpp
│ │ ├── DelimitedFile.hpp
│ │ ├── DelimitedLineParser.cpp
│ │ ├── DelimitedLineParser.hpp
│ │ ├── Exception.cpp
│ │ ├── Exception.hpp
│ │ ├── GenericHistogram.cpp
│ │ ├── GenericHistogram.hpp
│ │ ├── IndirectComparator.hpp
│ │ ├── Md5.cpp
│ │ ├── Md5.hpp
│ │ ├── RangeIntersector.hpp
│ │ ├── RangeSet.cpp
│ │ ├── RangeSet.hpp
│ │ ├── Streams.cpp
│ │ ├── Streams.hpp
│ │ ├── StringSet.cpp
│ │ ├── StringSet.hpp
│ │ ├── parse.cpp
│ │ └── parse.hpp
├── code_style.cfg
├── hdf5-blosc
│ ├── blosc_filter.c
│ ├── blosc_filter.h
│ ├── blosc_plugin.c
│ └── blosc_plugin.h
├── libplinkio
│ ├── COPYING
│ ├── LICENSE
│ ├── bed.c
│ ├── bed.h
│ ├── bed_header.c
│ ├── bed_header.h
│ ├── bim.c
│ ├── bim.h
│ ├── bim_parse.c
│ ├── bim_parse.h
│ ├── common.h
│ ├── cplinkio.c
│ ├── csv.h
│ ├── fam.c
│ ├── fam.h
│ ├── fam_parse.c
│ ├── fam_parse.h
│ ├── file.c
│ ├── file.h
│ ├── libcsv.c
│ ├── plinkio.c
│ ├── plinkio.h
│ ├── snp_lookup.h
│ ├── snp_lookup_big.h
│ ├── snp_lookup_little.h
│ ├── snparray.c
│ ├── snparray.h
│ ├── status.h
│ └── utarray.h
├── rext
│ ├── MetaSKAT.PFF.VAT.R
│ └── MetaSKAT.VAT.R
├── sqlite
│ ├── py2
│ │ ├── cache.c
│ │ ├── cache.h
│ │ ├── connection.c
│ │ ├── connection.h
│ │ ├── cursor.c
│ │ ├── cursor.h
│ │ ├── microprotocols.c
│ │ ├── microprotocols.h
│ │ ├── module.c
│ │ ├── module.h
│ │ ├── prepare_protocol.c
│ │ ├── prepare_protocol.h
│ │ ├── row.c
│ │ ├── row.h
│ │ ├── sqlitecompat.h
│ │ ├── statement.c
│ │ ├── statement.h
│ │ ├── util.c
│ │ └── util.h
│ ├── py3
│ │ ├── cache.c
│ │ ├── cache.h
│ │ ├── connection.c
│ │ ├── connection.h
│ │ ├── cursor.c
│ │ ├── cursor.h
│ │ ├── microprotocols.c
│ │ ├── microprotocols.h
│ │ ├── module.c
│ │ ├── module.h
│ │ ├── prepare_protocol.c
│ │ ├── prepare_protocol.h
│ │ ├── row.c
│ │ ├── row.h
│ │ ├── sqlitecompat.h
│ │ ├── statement.c
│ │ ├── statement.h
│ │ ├── util.c
│ │ └── util.h
│ ├── shell.c
│ ├── sqlite3.c
│ ├── sqlite3.h
│ ├── sqlite3ext.h
│ └── vt_sqlite3_ext.cpp
├── swigpyrun.h
├── ucsc
│ ├── inc
│ │ ├── aliType.h
│ │ ├── asParse.h
│ │ ├── bPlusTree.h
│ │ ├── bamFile.h
│ │ ├── base64.h
│ │ ├── basicBed.h
│ │ ├── bbiFile.h
│ │ ├── bigBed.h
│ │ ├── bigWig.h
│ │ ├── binRange.h
│ │ ├── bits.h
│ │ ├── bwgInternal.h
│ │ ├── cheapcgi.h
│ │ ├── cirTree.h
│ │ ├── common.h
│ │ ├── dlist.h
│ │ ├── dnaseq.h
│ │ ├── dnautil.h
│ │ ├── dystring.h
│ │ ├── errabort.h
│ │ ├── filePath.h
│ │ ├── fuzzyFind.h
│ │ ├── gfxPoly.h
│ │ ├── hash.h
│ │ ├── hmmstats.h
│ │ ├── htmshell.h
│ │ ├── https.h
│ │ ├── internet.h
│ │ ├── kxTok.h
│ │ ├── linefile.h
│ │ ├── localmem.h
│ │ ├── memalloc.h
│ │ ├── memgfx.h
│ │ ├── mime.h
│ │ ├── net.h
│ │ ├── obscure.h
│ │ ├── options.h
│ │ ├── pipeline.h
│ │ ├── portable.h
│ │ ├── psl.h
│ │ ├── rangeTree.h
│ │ ├── rbTree.h
│ │ ├── regexHelper.h
│ │ ├── sig.h
│ │ ├── sqlList.h
│ │ ├── sqlNum.h
│ │ ├── tokenizer.h
│ │ ├── udc.h
│ │ ├── vcf.h
│ │ ├── verbose.h
│ │ └── zlibFace.h
│ ├── lib
│ │ ├── aliType.c
│ │ ├── asParse.c
│ │ ├── bPlusTree.c
│ │ ├── bamFile.c
│ │ ├── base64.c
│ │ ├── basicBed.c
│ │ ├── bbiRead.c
│ │ ├── bigBed.c
│ │ ├── binRange.c
│ │ ├── bits.c
│ │ ├── bwgQuery.c
│ │ ├── cheapcgi.c
│ │ ├── cirTree.c
│ │ ├── common.c
│ │ ├── dlist.c
│ │ ├── dnautil.c
│ │ ├── dystring.c
│ │ ├── errabort.c
│ │ ├── ffAli.c
│ │ ├── filePath.c
│ │ ├── hash.c
│ │ ├── hmmstats.c
│ │ ├── htmshell.c
│ │ ├── https.c
│ │ ├── intExp.c
│ │ ├── internet.c
│ │ ├── kxTok.c
│ │ ├── linefile.c
│ │ ├── localmem.c
│ │ ├── memalloc.c
│ │ ├── mime.c
│ │ ├── net.c
│ │ ├── obscure.c
│ │ ├── osunix.c
│ │ ├── pipeline.c
│ │ ├── portimpl.c
│ │ ├── portimpl.h
│ │ ├── psl.c
│ │ ├── rangeTree.c
│ │ ├── rbTree.c
│ │ ├── regexHelper.c
│ │ ├── servBrcMcw.c
│ │ ├── servCrunx.c
│ │ ├── servcis.c
│ │ ├── servcl.c
│ │ ├── servmsII.c
│ │ ├── servpws.c
│ │ ├── sqlList.c
│ │ ├── sqlNum.c
│ │ ├── tokenizer.c
│ │ ├── udc.c
│ │ ├── vcf.c
│ │ ├── verbose.c
│ │ ├── wildcmp.c
│ │ └── zlibFace.c
│ ├── samtools
│ │ ├── bam.c
│ │ ├── bam.h
│ │ ├── bam_aux.c
│ │ ├── bam_import.c
│ │ ├── bam_index.c
│ │ ├── bam_pileup.c
│ │ ├── bgzf.c
│ │ ├── bgzf.h
│ │ ├── faidx.c
│ │ ├── faidx.h
│ │ ├── kstring.h
│ │ ├── razf.c
│ │ ├── razf.h
│ │ ├── sam.c
│ │ ├── sam.h
│ │ ├── sam_header.c
│ │ └── sam_header.h
│ └── tabix
│ │ ├── bam_endian.h
│ │ ├── bedidx.c
│ │ ├── index.c
│ │ ├── khash.h
│ │ ├── knetfile.c
│ │ ├── knetfile.h
│ │ ├── kseq.h
│ │ ├── ksort.h
│ │ ├── kstring.c
│ │ ├── kstring.h
│ │ └── tabix.h
└── variant_tools
│ ├── __init__.py
│ ├── _version.py
│ ├── accessor.py
│ ├── action.cpp
│ ├── action.h
│ ├── annotation.py
│ ├── assoData.cpp
│ ├── assoData.h
│ ├── assoTests.h
│ ├── assoTests.i
│ ├── assoTests.py
│ ├── assoTests_wrap.cpp
│ ├── association.py
│ ├── association_hdf5.py
│ ├── cgatools.i
│ ├── cgatools.py
│ ├── cgatools_wrap.cpp
│ ├── checking_asso_result.py
│ ├── compare.py
│ ├── exporter.py
│ ├── exporter_reader.py
│ ├── fisher2.c
│ ├── fisher2.h
│ ├── geno_store.py
│ ├── genotypes.c
│ ├── genotypes.h
│ ├── importer.py
│ ├── importer_allele_hdf5.py
│ ├── io_vcf_read.pyx
│ ├── liftOver.py
│ ├── lm.cpp
│ ├── lm.h
│ ├── merge_sort_parallel.py
│ ├── meta.py
│ ├── phenotype.py
│ ├── pipeline.py
│ ├── plinkfile.py
│ ├── plot.py
│ ├── preprocessor.py
│ ├── project.py
│ ├── rtester.py
│ ├── simulation.py
│ ├── site_options.py
│ ├── tester.py
│ ├── text_reader.py
│ ├── ucsctools.i
│ ├── ucsctools.py
│ ├── ucsctools_wrap.cpp
│ ├── update.py
│ ├── utils.cpp
│ ├── utils.h
│ ├── utils.py
│ ├── variant.py
│ ├── vt_sqlite3.py
│ ├── vtools.py
│ ├── vtools_association_cluster.lsf
│ ├── vtools_association_cluster.pbs
│ ├── vtools_report.py
│ └── worker_zmq.py
└── test
├── ann
├── testNSFP.DB.gz
├── testNSFP.ann
├── testNSFP.zip
├── testThousandGenomes.ann
├── testThousandGenomes.vcf.head
└── testThousandGenomes.zip
├── fmt
├── basic_hg18.fmt
├── dbSNP_hg19validation.fmt
├── genotypes.fmt
├── missing_gen.fmt
├── multi_index.fmt
├── new_format.fmt
└── randcol.fmt
├── output
├── CGA_variant.txt
├── assogrp1.txt
├── assogrp2.txt
├── assogrp3.txt
├── assogrp4.txt
├── assogrp5.txt
├── assogrp6.txt
├── assogrp7.txt
├── assogrp8.txt
├── assores1.txt
├── assores2.txt
├── assores3.txt
├── assores4.txt
├── assores5.txt
├── assores6.txt
├── assores7.txt
├── assores8.txt
├── assores_wss1.txt
├── assores_wss2.txt
├── assores_wss3.txt
├── assores_wss4.txt
├── assores_wss5.txt
├── assores_wss6.txt
├── assores_wss7.txt
├── assores_wss8.txt
├── evsVariantTest.txt
├── exclude_anno.txt
├── exclude_anno1.txt
├── exclude_select_anno.txt
├── exclude_sift.txt
├── genotype_variant_sample_output.txt
├── import_cga.txt
├── import_cga_phenotype.txt
├── import_csv.txt
├── import_customized.txt
├── import_genotype_1.txt
├── import_genotype_2.txt
├── import_mixed_build.txt
├── import_mpi_genotype.txt
├── import_mpi_genotypes.txt
├── import_mpi_multi_genotype.txt
├── import_mpi_multi_genotype_hdf5.txt
├── import_mpi_multi_genotypes.txt
├── import_mpi_multi_samples.txt
├── import_mpi_multi_variant.txt
├── import_mpi_samples.txt
├── import_mpi_variant.txt
├── import_multi_sample2_samples.txt
├── import_multi_sample2_samples_hdf5.txt
├── import_multi_sample2_variant.txt
├── import_multi_sample_samples.txt
├── import_multi_sample_samples_hdf5.txt
├── import_multi_sample_variant.txt
├── import_txt_1.txt
├── import_vcf_alt.txt
├── import_vcf_ref.txt
├── liftover.txt
├── liftover_cmp.txt
├── missing_gen.tped
├── phenotype_fields.txt
├── phenotype_import.txt
├── phenotype_phenotype_with_filename.txt
├── phenotype_phenotype_with_filename_field.txt
├── remove_field_after.txt
├── remove_field_before.txt
├── remove_genofield_after.txt
├── remove_genofield_after_hdf5.txt
├── remove_genofield_after_sqlite.txt
├── remove_genofield_before.txt
├── remove_genofield_before_hdf5.txt
├── remove_genofield_before_sqlite.txt
├── remove_phenotype.txt
├── remove_phenotype_output.txt
├── remove_phenotype_sqlite.txt
├── update_sum_stat.txt
├── use_field.txt
├── use_position.txt
├── vcf_assigned_sample_name_genotype.txt
├── vcf_multiple_sample_name.txt
├── vcf_multiple_samples_genotypes.txt
└── vcf_single_sampleName_genotype.txt
├── phenotype
├── badphenotype1.txt
├── badphenotype2.txt
├── badphenotype3.txt
├── pheno_filename.txt
└── phenotype.txt
├── plink
├── dat1.bed
├── dat1.bim
└── dat1.fam
├── proj
└── assoproj.tar.gz
├── run_tests.py
├── temp_test_import_hdf5.py
├── testUtils.py
├── test_admin.py
├── test_associate.py
├── test_avg_depth.py
├── test_compare.py
├── test_exclude.py
├── test_execute.py
├── test_export.py
├── test_func.py
├── test_import.py
├── test_init.py
├── test_liftover.py
├── test_output.py
├── test_phenotype.py
├── test_pipeline.pipeline
├── test_pipeline.py
├── test_remove.py
├── test_select.py
├── test_show.py
├── test_trans_ratio.py
├── test_update.py
├── test_use.py
├── txt
├── ANNOVAR.txt
├── CASAVA18_INDEL.txt
├── CASAVA18_SNP.txt
├── CGA.tsv.bz2
├── annovar.txt.exonic_variant_function
├── assoc.dat
├── assoc.fmt
├── assoc.phen
├── complteGenomics.tsv.bz2
├── dbSNP_hg19validation.txt
├── genotypes.txt
├── input.tsv
├── invalid.tsv
├── pileup.indel
├── sample_1_chr22.txt
├── sample_chr22.txt
├── test.csv
└── variants.txt
└── vcf
├── 500SAMP.vcf
├── CEU.vcf.gz
├── CEU.vcf.gz.tbi
├── CEU_dup.vcf.gz
├── EMPTY.vcf
├── SAMP1.vcf
├── SAMP2.vcf
├── SAMP3_complex_variants.vcf
├── SAMP4_complex_variants.vcf
├── V1.vcf
├── V2.vcf
├── V3.vcf
├── chromX.vcf.gz
├── compare.vcf
├── dup_geno.vcf
├── hdf5_test.h5
├── hdf5_test.vcf
├── input.vcf
├── input_nogeno.vcf
├── missing_gen.vcf
├── missing_gen_hdf5.vcf
├── var_format.vcf
└── with_wildtype.vcf
/.gitignore:
--------------------------------------------------------------------------------
1 | src/hdf5-blosc/.genotypes.c.swp
2 | src/hdf5-blosc/builder
3 | src/hdf5-blosc/src
4 | src/variant_tools/blosc_filter.o
5 | boost_1_49_0/
6 | src/boost_1_49_0/
7 | src/zeromq-4.0.3/
8 | test/.snapshot.info
9 | src/variant_tools/celery_main/
10 | src/variant_tools/io_vcf_read.c
11 | src/variant_tools.egg-info/
12 | test/*.log
13 | test/*.DB
14 | test/*.proj
15 | test/*.gz
16 | test/*.tfam
17 | test/*.vcf
18 | test/.vtools_cache/
19 | test/10
20 | test/ann/testNSFP.DB
21 | test/parent/
22 | test/tmp_*_genotypes.h5
23 | test/tmp_*_genotypes_multi_genes.h5
24 | test/ceu/
25 | test/sam1/
26 | build
27 | dist
28 | cgatools
29 | sqlite
30 | libplinkio
31 | ucsc
32 | *.swp
33 | build
34 | dist
35 | *.pyc
36 | *.log
37 | variant_tools/assoTests.py
38 | variant_tools/assoTests_wrap.cpp
39 | variant_tools/cgatools.py
40 | variant_tools/cgatools_wrap.cpp
41 | variant_tools/swigpyrun.h
42 | variant_tools/ucsctools.py
43 | variant_tools/ucsctools_wrap.cpp
44 | .DS_Store
45 | src/variant_tools/*.o
46 | .vscode/
47 |
--------------------------------------------------------------------------------
/.gitlab/issue_templates/bug.md:
--------------------------------------------------------------------------------
1 | # Issue template
2 | -[]What
3 | -[]How
4 | -[]When
5 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # See https://pre-commit.com for more information
2 | # See https://pre-commit.com/hooks.html for more hooks
3 | repos:
4 | - repo: https://github.com/pre-commit/pre-commit-hooks
5 | rev: v2.4.0
6 | hooks:
7 | - id: trailing-whitespace
8 | - id: end-of-file-fixer
9 | - id: check-yaml
10 | - id: check-added-large-files
11 | - id: flake8
12 | args: ["--ignore=E501,W504,W503, E128"]
13 | - repo: https://github.com/pre-commit/mirrors-yapf
14 | rev: ''
15 | hooks:
16 | - id: yapf
17 | args: [--style, "{based_on_style:chromium,indent_width:4}"]
18 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | os:
2 | - linux
3 | # travis does not support python on osx yet (https://github.com/travis-ci/travis-ci/issues/4729)
4 | language: python
5 | python:
6 | # - "3.4"
7 | - "3.5"
8 | - "3.6"
9 |
10 | sudo: required
11 | services:
12 | - docker
13 | # before_install:
14 | # - sudo apt-get install swig zlibc zlib1g zlib1g-dev libblas-dev liblapack-dev
15 | # install: "python setup.py install"
16 | # before_script: cd test
17 | # script:
18 | # - python run_tests.py
19 | install:
20 | - docker pull junmahouston/vtools_test:v3
21 | - docker run -dt --name vtools_test junmahouston/vtools_test:v3
22 | - docker cp ./src vtools_test:/home/bpeng/VariantTools
23 | - docker cp ./test vtools_test:/home/bpeng/VariantTools
24 | - docker cp setup.py vtools_test:/home/bpeng/VariantTools
25 | - docker exec vtools_test bash -c "cd VariantTools && python setup.py install"
26 | script:
27 | - docker exec vtools_test bash -c "cd VariantTools/test && python run_tests.py"
28 |
29 | email:
30 | recipients:
31 | - junmahouston@gmail.com
32 | on_success: never
33 | on_failure: always
34 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | # $File: MANIFEST.in $
2 | # $LastChangedDate: 2011-06-16 20:10:41 -0500 (Thu, 16 Jun 2011) $
3 | # $Rev: 4234 $
4 | #
5 | # This file is part of variant_tools, a software application to annotate,
6 | # summarize, and filter variants for next-gen sequencing ananlysis.
7 | # Please visit http://varianttools.sourceforge.net for details.
8 | #
9 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org)
10 | #
11 | # This program is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # This program is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with this program. If not, see .
23 | #
24 |
25 | include setup.py
26 | include README.md
27 | include LICENSE
28 | # libplinkio
29 | recursive-include src *.py *.c *.cpp *.h *.hpp *.i *.ipp *.pyx
30 | recursive-include test *.py
31 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://badge.fury.io/py/variant-tools)
2 |
3 | # Variant Tools
4 |
5 | A command line tool for the manipulation, annotation, and analysis of genetic variants
6 | from next-generation sequencing studies.
7 |
8 | # Installation
9 |
10 | If you are using a conda environment, you can install variant tools with command
11 |
12 | ```
13 | conda install variant_tools -c bioconda -c conda-forge
14 | ```
15 | Option `-c conda-forge` is required to enforce the use of `conda-forge` version of dependencies (e.g. `boost-cpp`) over their counterpoarts in the base channel.
16 |
17 | Otherwise, you can try to install it through `pip`
18 |
19 | ```
20 | pip install variant_tools
21 | ```
22 |
23 | You will need to install
24 |
25 | * `libboost`
26 | * `gsl`
27 | * `numpy`
28 | * `Cython`
29 | * `hdf5`
30 | * `blosc`
31 | * A C++ compiler such as `gcc`
32 |
33 | which, in a conda environment, could be installed with command
34 |
35 | ```
36 | conda install -c conda-forge boost-cpp gsl numpy cython blosc hdf5
37 | ```
38 |
39 | This method can be used if you download or clone the latest version
40 | of variant tools from this repository.
41 |
42 | # Documentation
43 |
44 | Please refer to [Variant Tools documentation](https://vatlab.github.io/vat-docs/) for details.
45 |
--------------------------------------------------------------------------------
/development/Linux/install.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -e
3 | is_relative() {
4 | local path="$1"
5 | shift
6 |
7 | [ "${path:0:1}" != "/" ]
8 | return
9 | }
10 | install () {
11 | mkdir -p $2/bin
12 | rm -rf $2/lib/variant_tools &> /dev/null
13 | mkdir -p $2/lib/variant_tools
14 | cp -r $1/* $2/lib/variant_tools
15 | for cmd in vtools vtools_report; do
16 | rm -rf $2/bin/$cmd &> /dev/null
17 | ln -s $2/lib/variant_tools/$cmd $2/bin/$cmd
18 | done
19 | echo -e "Libraries are installed to $2/lib\nBinary files are installed to $2/bin\n"
20 | }
21 | main () {
22 | local fullpath=""
23 | echo "Enter installation directory for variant tools & variant association tools: "
24 | printf "\t [/usr/local] "
25 | read fullpath
26 | if [ -z $fullpath ]; then
27 | install $1 "/usr/local"
28 | else
29 | eval fullpath=$fullpath
30 | if is_relative $fullpath; then
31 | fullpath=$PWD/$fullpath
32 | fi
33 | install $1 $fullpath
34 | fi
35 | }
36 | main $@
37 |
--------------------------------------------------------------------------------
/development/MacOSX/INSTALL:
--------------------------------------------------------------------------------
1 | This disk contains
2 |
3 | * A README file.
4 |
5 | * A MacOS X installer that installs variant tools apps to system Applications
6 | directory and commands vtools and vtools_report to /usr/local/bin. It
7 | requires root privilege.
8 |
9 | * Two self-contained executables vtools and vtools_report that can be executed
10 | directly without installation. These commands start slower than the installed
11 | version and are not recommended for general use.
12 |
--------------------------------------------------------------------------------
/development/MacOSX/postinstall.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | #
3 | # $File: postinstall.sh $
4 | # $LastChangedDate: 2013-04-16 13:32:03 -0500 (Tue, 16 Apr 2013) $
5 | # $Rev: 1825 $
6 | #
7 | # This file is part of variant_tools, a software application to annotate,
8 | # summarize, and filter variants for next-gen sequencing ananlysis.
9 | # Please visit http://varianttools.sourceforge.net for details.
10 | #
11 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org)
12 | #
13 | # This program is free software: you can redistribute it and/or modify
14 | # it under the terms of the GNU General Public License as published by
15 | # the Free Software Foundation, either version 3 of the License, or
16 | # (at your option) any later version.
17 | #
18 | # This program is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 | # GNU General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU General Public License
24 | # along with this program. If not, see .
25 | #
26 |
27 | # install vtools and vtools_report to /usr/local/bin
28 | #
29 | if [ -f /usr/local/bin/vtools ]
30 | then
31 | /bin/rm -f /usr/local/bin/vtools
32 | fi
33 |
34 | if [ -f /usr/local/bin/vtools_report ]
35 | then
36 | /bin/rm -f /usr/local/bin/vtools_report
37 | fi
38 |
39 | /bin/ln -s /Applications/variant_tools/variant_tools.app/Contents/MacOS/vtools /usr/local/bin
40 | /bin/ln -s /Applications/variant_tools/variant_tools.app/Contents/MacOS/vtools_report /usr/local/bin
41 |
42 | exit 0
43 |
--------------------------------------------------------------------------------
/development/MacOSX/variant_tools.pmdoc/01variant.xml:
--------------------------------------------------------------------------------
1 | variant_tools.vt.pkg1.0.6../../dist/variant_tools/ApplicationsinstallTo.pathversionparentidentifierinstallToincludeRoot../../development/MacOSX/postinstall.sh01variant-contents.xml/CVS$/\.svn$/\.cvsignore$/\.cvspass$/\.DS_Store$
2 |
--------------------------------------------------------------------------------
/development/conda/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | $PYTHON setup.py install
4 |
5 | # Add more build steps here, if they are necessary.
6 |
7 | # See
8 | # http://docs.continuum.io/conda/build.html
9 | # for a list of environment variables that are set during the build process.
10 |
--------------------------------------------------------------------------------
/development/conda/meta.yaml:
--------------------------------------------------------------------------------
1 | package:
2 | name: variant_tools
3 | version: !!str 3.0.1
4 |
5 | source:
6 | fn: variant_tools-3.0.1.tar.gz
7 | url: https://pypi.python.org/packages/source/v/variant_tools/variant_tools-3.0.1.tar.gz
8 | # patches:
9 | # List any patch files here
10 | # - fix.patch
11 |
12 | # build:
13 | # preserve_egg_dir: True
14 | # entry_points:
15 | # Put any entry points (scripts to be generated automatically) here. The
16 | # syntax is module:function. For example
17 | #
18 | # - simupop = simupop:main
19 | #
20 | # Would create an entry point called simupop that calls simupop.main()
21 |
22 |
23 | # If this is a new build for the same version, increment the build
24 | # number. If you do not include this key, it defaults to 0.
25 | # number: 1
26 |
27 | requirements:
28 | build:
29 | - python # [py3k]
30 | - setuptools
31 | - numpy
32 | - cython
33 | - pyzmq
34 |
35 | run:
36 | - python # [py3k]
37 | - numpy
38 |
39 | test:
40 | # Python imports
41 | imports:
42 | - variant_tools
43 |
44 | # commands:
45 | # You can put test commands to be run here. Use this to test that the
46 | # entry points work.
47 |
48 |
49 | # You can also put a file called run_test.py in the recipe that will be run
50 | # at test time.
51 |
52 | # requires:
53 | # Put any additional test requirements here. For example
54 | # - nose
55 |
56 | about:
57 | home: http://varianttools.sourceforge.net
58 | license: GNU General Public License (GPL)
59 | summary: 'Integrated annotation and analysis of next gen sequencing data'
60 |
61 | # See
62 | # http://docs.continuum.io/conda/build.html for
63 | # more information about meta.yaml
64 |
--------------------------------------------------------------------------------
/development/docker_ci/Dockerfile:
--------------------------------------------------------------------------------
1 | #
2 | # Docker image for variant tools
3 | #
4 | FROM continuumio/miniconda3
5 |
6 | MAINTAINER Bo Peng
7 |
8 | RUN apt-get update
9 | RUN apt-get -y install swig gcc g++ build-essential bzip2 libbz2-dev libz-dev curl git vim libblas-dev liblapack-dev libcurl4-openssl-dev libssl-dev
10 |
11 |
12 | RUN conda update python
13 | RUN pip install numpy scipy tables cython
14 |
15 | RUN conda install -c conda-forge hdf5 blosc gsl libboost
16 | ENV LD_INCLUDE_PATH=/opt/conda/include/
17 |
18 |
19 | WORKDIR /home/bpeng
20 | RUN git clone http://github.com/vatlab/VariantTools VariantTools
21 |
22 | WORKDIR /home/bpeng/VariantTools
23 | RUN git fetch
24 | RUN git checkout f74ee0c66e042f55d82c2a67d14c20e054e57597
25 | RUN python setup.py install
26 |
27 | ENV HOME /home/bpeng
28 | RUN mkdir /home/bpeng/temp
29 |
30 | # download hg19 reference genome and refGene database
31 | # WORKDIR /home/bpeng/temp
32 | RUN touch temp.vcf
33 | RUN vtools init test --build hg19
34 | RUN vtools import temp.vcf
35 | RUN vtools use refGene
36 |
37 | WORKDIR /home/bpeng
38 | RUN rm -rf temp
39 |
40 | RUN mkdir /home/bpeng/temp
41 |
42 | # download hg18 reference genome and refGene database
43 | WORKDIR /home/bpeng/temp
44 | RUN touch temp.vcf
45 | RUN vtools init test --build hg18
46 | RUN vtools import temp.vcf
47 | RUN vtools use refGene
48 |
49 | WORKDIR /home/bpeng
50 | RUN rm -rf temp
51 |
--------------------------------------------------------------------------------
/resources/annotation/CancerGeneCensus-20111215.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 | #
7 | # To rebuild this database using a new version of data
8 | # 1. download an excel file from Cancer Genome Project: http://www.sanger.ac.uk/genetics/CGP/Census/
9 | # 2. save the data in a tab-delimited text file in filename CancerGeneCensus.txt. Remove the header.
10 | # 3. run vtools use CancerGeneCensus.ann --files CancerGeneCensus.txt
11 |
12 | [linked fields]
13 | *=GeneSymbol
14 |
15 | [data sources]
16 | description=Cancer Genome Project
17 | version=20111215
18 | encoding=ISO-8859-1
19 | anno_type=field
20 | direct_url=annoDB/CancerGeneCensus-20111215.DB.gz
21 | source_url=
22 | source_type=txt
23 |
24 | [GeneSymbol]
25 | index=1
26 | type=VARCHAR(255)
27 |
28 | [Name]
29 | index=2
30 | type=VARCHAR(255)
31 |
32 | [GeneID]
33 | index=3
34 | type=VARCHAR(255)
35 |
36 | [Chr]
37 | index=4
38 | type=VARCHAR(255)
39 |
40 | [ChrBand]
41 | index=5
42 | type=VARCHAR(255)
43 |
44 | [CancerSomaticMut]
45 | index=6
46 | type=VARCHAR(255)
47 |
48 | [CancerGermlineMut]
49 | index=7
50 | type=VARCHAR(255)
51 |
52 | [TumourTypesSomatic]
53 | index=8
54 | type=VARCHAR(255)
55 |
56 | [TumourTypesGermline]
57 | index=9
58 | type=VARCHAR(255)
59 |
60 | [CancerSyndrome]
61 | index=10
62 | type=VARCHAR(255)
63 |
64 | [TissueType]
65 | index=11
66 | type=VARCHAR(255)
67 |
68 | [CancerMolecularGenetics]
69 | index=12
70 | type=VARCHAR(255)
71 |
72 | [MutationType]
73 | index=13
74 | type=VARCHAR(255)
75 |
76 | [TranslocationPartner]
77 | index=14
78 | type=VARCHAR(255)
79 |
80 | [OtherGermlineMut]
81 | index=15
82 | type=VARCHAR(255)
83 |
84 | [OtherSyndromeOrDisease]
85 | index=16
86 | type=VARCHAR(255)
87 |
--------------------------------------------------------------------------------
/resources/annotation/CosmicCodingMuts-v61_260912.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Format/New for
5 | # a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr, pos, ref, alt
9 |
10 | [data sources]
11 | description=Cosmic coding mutation database. This data contains mutations affecting 10 or less nucleotides in REF. The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mutations In Cancer web site, http://www.sanger.ac.uk/cosmic. Bamford et al (2004). The COSMIC (Catalogue of Somatic Mutations in Cancer) database and website. Br J Cancer, 91,355-358.
12 | version=v61_260912
13 | anno_type=variant
14 | direct_url=annoDB/CosmicCodingMuts-v61_260912.DB.gz
15 | source_url=ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/CosmicCodingMuts_v61_260912.vcf.gz
16 | source_type=txt
17 | source_pattern=
18 |
19 | [chr]
20 | index=1
21 | type=VARCHAR(20)
22 | comment=Chromosome
23 |
24 | [pos]
25 | index=2
26 | type=INTEGER NOT NULL
27 | comment=1-based position
28 |
29 | [COSMIC_ID]
30 | index=3
31 | type=VARCHAR(48)
32 | comment=cosmic id of mutation
33 |
34 | [ref]
35 | index=4
36 | type=VARCHAR(255)
37 | comment=Reference allele, '-' for insertion.
38 |
39 | [alt]
40 | index=5
41 | adj=CheckSplit()
42 | type=VARCHAR(255)
43 | comment=Alternative allele, '-' for deletion.
44 |
45 | [gene]
46 | index=8
47 | type=VARCHAR(255)
48 | adj=ExtractValue('GENE=', ';')
49 | comment=genename
50 |
51 | [strand]
52 | index=8
53 | adj=ExtractValue('STRAND=', ';')
54 | type=VARCHAR(255)
55 | comment=strand
56 |
57 | [CDS]
58 | index=8
59 | adj=ExtractValue('CDS=', ';')
60 | type=VARCHAR(255)
61 | comment=CDS annotation
62 |
63 | [AA]
64 | index=8
65 | adj=ExtractValue('AA=', ';')
66 | type=VARCHAR(255)
67 | comment=Peptide annotation
68 |
69 | [CNT]
70 | index=8
71 | adj=ExtractValue('CNT=', ';')
72 | type=INT
73 | comment=Number of samples with this mutation
74 |
--------------------------------------------------------------------------------
/resources/annotation/CosmicCodingMuts-v67_20131024.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Format/New for
5 | # a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr, pos, ref, alt
9 |
10 | [data sources]
11 | description=Cosmic coding mutation database. This data contains mutations affecting 10 or less nucleotides in REF. The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mutations In Cancer web site, http://www.sanger.ac.uk/cosmic. Bamford et al (2004). The COSMIC (Catalogue of Somatic Mutations in Cancer) database and website. Br J Cancer, 91,355-358.
12 | version=v67_20131024
13 | anno_type=variant
14 | source_url=ftp://ngs.sanger.ac.uk/production/cosmic/CosmicCodingMuts_v67_20131024.vcf.gz
15 | direct_url=annoDB/CosmicCodingMuts-v67_20131024.DB.gz ad998078fab6ee4c225ab438f75f2360
16 | source_type=txt
17 | source_pattern=
18 |
19 | [chr]
20 | index=1
21 | type=VARCHAR(20)
22 | comment=Chromosome
23 |
24 | [pos]
25 | index=2
26 | type=INTEGER NOT NULL
27 | comment=1-based position
28 |
29 | [COSMIC_ID]
30 | index=3
31 | type=VARCHAR(48)
32 | comment=cosmic id of mutation
33 |
34 | [ref]
35 | index=4
36 | type=VARCHAR(255)
37 | comment=Reference allele, '-' for insertion.
38 |
39 | [alt]
40 | index=5
41 | adj=CheckSplit()
42 | type=VARCHAR(255)
43 | comment=Alternative allele, '-' for deletion.
44 |
45 | [gene]
46 | index=8
47 | type=VARCHAR(255)
48 | adj=ExtractValue('GENE=', ';')
49 | comment=genename
50 |
51 | [strand]
52 | index=8
53 | adj=ExtractValue('STRAND=', ';')
54 | type=VARCHAR(255)
55 | comment=strand
56 |
57 | [CDS]
58 | index=8
59 | adj=ExtractValue('CDS=', ';')
60 | type=VARCHAR(255)
61 | comment=CDS annotation
62 |
63 | [AA]
64 | index=8
65 | adj=ExtractValue('AA=', ';')
66 | type=VARCHAR(255)
67 | comment=Peptide annotation
68 |
69 | [CNT]
70 | index=8
71 | adj=ExtractValue('CNT=', ';')
72 | type=INT
73 | comment=Number of samples with this mutation
74 |
--------------------------------------------------------------------------------
/resources/annotation/CosmicNonCodingVariants-v61_260912.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Format/New for
5 | # a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr, pos, ref, alt
9 |
10 | [data sources]
11 | description=Cosmic non-coding mutation database. This data contains mutations affecting 10 or less nucleotides in REF. The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mutations In Cancer web site, http://www.sanger.ac.uk/cosmic. Bamford et al (2004). The COSMIC (Catalogue of Somatic Mutations in Cancer) database and website. Br J Cancer, 91,355-358.
12 | version=v61_260912
13 | anno_type=variant
14 | direct_url=annoDB/CosmicNonCodingVariants-v61_260912.DB.gz
15 | source_url=ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/CosmicNonCodingVariants_v61_260912.vcf.gz
16 | source_type=txt
17 | source_pattern=
18 |
19 | [chr]
20 | index=1
21 | type=VARCHAR(20)
22 | comment=Chromosome
23 |
24 | [pos]
25 | index=2
26 | type=INTEGER NOT NULL
27 | comment=1-based position
28 |
29 | [COSMIC_ID]
30 | index=3
31 | type=VARCHAR(48)
32 | comment=cosmic id of mutation
33 |
34 | [ref]
35 | index=4
36 | type=VARCHAR(255)
37 | comment=Reference allele, '-' for insertion.
38 |
39 | [alt]
40 | index=5
41 | adj=CheckSplit()
42 | type=VARCHAR(255)
43 | comment=Alternative allele, '-' for deletion.
44 |
45 | [gene]
46 | index=8
47 | type=VARCHAR(255)
48 | adj=ExtractValue('GENE=', ';')
49 | comment=genename
50 |
51 | [strand]
52 | index=8
53 | adj=ExtractValue('STRAND=', ';')
54 | type=VARCHAR(255)
55 | comment=strand
56 |
--------------------------------------------------------------------------------
/resources/annotation/CosmicNonCodingVariants-v67_241013.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Format/New for
5 | # a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr, pos, ref, alt
9 |
10 | [data sources]
11 | description=Cosmic non-coding mutation database. This data contains mutations affecting 10 or less nucleotides in REF. The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mutations In Cancer web site, http://www.sanger.ac.uk/cosmic. Bamford et al (2004). The COSMIC (Catalogue of Somatic Mutations in Cancer) database and website. Br J Cancer, 91,355-358.
12 | version=v67_241013
13 | anno_type=variant
14 | source_url=ftp://ngs.sanger.ac.uk/production/cosmic/CosmicNonCodingVariants_v67_20131024.vcf.gz
15 | direct_url=annoDB/CosmicNonCodingVariants-v67_241013.DB.gz 07366d9d5ba0cd79e03893263d31b7ea
16 | source_type=txt
17 | source_pattern=
18 |
19 | [chr]
20 | index=1
21 | type=VARCHAR(20)
22 | comment=Chromosome
23 |
24 | [pos]
25 | index=2
26 | type=INTEGER NOT NULL
27 | comment=1-based position
28 |
29 | [COSMIC_ID]
30 | index=3
31 | type=VARCHAR(48)
32 | comment=cosmic id of mutation
33 |
34 | [ref]
35 | index=4
36 | type=VARCHAR(255)
37 | comment=Reference allele, '-' for insertion.
38 |
39 | [alt]
40 | index=5
41 | adj=CheckSplit()
42 | type=VARCHAR(255)
43 | comment=Alternative allele, '-' for deletion.
44 |
45 | [gene]
46 | index=8
47 | type=VARCHAR(255)
48 | adj=ExtractValue('GENE=', ';')
49 | comment=genename
50 |
51 | [strand]
52 | index=8
53 | adj=ExtractValue('STRAND=', ';')
54 | type=VARCHAR(255)
55 | comment=strand
56 |
--------------------------------------------------------------------------------
/resources/annotation/DGV-hg18_20130723.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg18=chr,start,end
9 |
10 | [data sources]
11 | description=Database of Genomic Variants, a curated catalogue of human genomic structural variation.
12 | anno_type=range
13 | header=1
14 | version=hg18_20130723
15 | source_url=http://dgv.tcag.ca/dgv/docs/NCBI36_hg18_variants_2013-07-23.txt
16 | direct_url=annoDB/DGV-hg18_20130723.DB.gz
17 | source_type=txt
18 | delimiter="\t"
19 |
20 | [variantaccession]
21 | index=1
22 | type=VARCHAR(10)
23 |
24 | [chr]
25 | index=2
26 | type=VARCHAR(48)
27 |
28 | [start]
29 | index=3
30 | type=INT
31 |
32 | [end]
33 | index=4
34 | type=INT
35 |
36 | [varianttype]
37 | index=5
38 | type=VARCHAR(3)
39 |
40 | [variantsubtype]
41 | index=6
42 | type=VARCHAR(11)
43 |
44 | [reference]
45 | index=7
46 | type=VARCHAR(31)
47 |
48 | [pubmedid]
49 | index=8
50 | type=INT
51 |
52 | [method]
53 | index=9
54 | type=VARCHAR(67)
55 |
56 | [platform]
57 | index=10
58 | type=VARCHAR(181)
59 |
60 | [mergedvariants]
61 | index=11
62 | type=VARCHAR(255)
63 |
64 | [supportingvariants]
65 | index=12
66 | type=VARCHAR(1144)
67 |
68 | [mergedorsample]
69 | index=13
70 | type=VARCHAR(1)
71 |
72 | [frequency]
73 | index=14
74 | type=VARCHAR(255)
75 |
76 | [samplesize]
77 | index=15
78 | type=INT
79 |
80 | [observedgains]
81 | index=16
82 | type=INT
83 |
84 | [observedlosses]
85 | index=17
86 | type=INT
87 |
88 | [cohortdescription]
89 | index=18
90 | type=VARCHAR(69)
91 |
92 | [genes]
93 | index=19
94 | type=VARCHAR(412)
95 |
96 | [samples]
97 | index=20
98 | type=VARCHAR(951)
99 |
--------------------------------------------------------------------------------
/resources/annotation/DGV-hg19_20130723.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr,start,end
9 |
10 | [data sources]
11 | description=Database of Genomic Variants, a curated catalogue of human genomic structural variation.
12 | anno_type=range
13 | header=1
14 | version=hg19_20130723
15 | source_url=http://dgv.tcag.ca/dgv/docs/GRCh37_hg19_variants_2013-07-23.txt
16 | direct_url=annoDB/DGV-hg19_20130723.DB.gz a9c4f9f23ce4595d9c33b0a499273d53
17 | source_type=txt
18 | delimiter="\t"
19 |
20 | [variantaccession]
21 | index=1
22 | type=VARCHAR(10)
23 |
24 | [chr]
25 | index=2
26 | type=VARCHAR(48)
27 |
28 | [start]
29 | index=3
30 | type=INT
31 |
32 | [end]
33 | index=4
34 | type=INT
35 |
36 | [varianttype]
37 | index=5
38 | type=VARCHAR(3)
39 |
40 | [variantsubtype]
41 | index=6
42 | type=VARCHAR(11)
43 |
44 | [reference]
45 | index=7
46 | type=VARCHAR(31)
47 |
48 | [pubmedid]
49 | index=8
50 | type=INT
51 |
52 | [method]
53 | index=9
54 | type=VARCHAR(67)
55 |
56 | [platform]
57 | index=10
58 | type=VARCHAR(181)
59 |
60 | [mergedvariants]
61 | index=11
62 | type=VARCHAR(255)
63 |
64 | [supportingvariants]
65 | index=12
66 | type=VARCHAR(1144)
67 |
68 | [mergedorsample]
69 | index=13
70 | type=VARCHAR(1)
71 |
72 | [frequency]
73 | index=14
74 | type=VARCHAR(255)
75 |
76 | [samplesize]
77 | index=15
78 | type=INT
79 |
80 | [observedgains]
81 | index=16
82 | type=INT
83 |
84 | [observedlosses]
85 | index=17
86 | type=INT
87 |
88 | [cohortdescription]
89 | index=18
90 | type=VARCHAR(69)
91 |
92 | [genes]
93 | index=19
94 | type=VARCHAR(412)
95 |
96 | [samples]
97 | index=20
98 | type=VARCHAR(951)
99 |
--------------------------------------------------------------------------------
/resources/annotation/DGV-hg19_20160515.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) Man Chong Leong 2017 (henryleong@rice.edu)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr,start,end
9 |
10 | [data sources]
11 | description=Database of Genomic Variants, a curated catalogue of human genomic structural variation.
12 | anno_type=range
13 | header=1
14 | version=hg19_20160515
15 | source_url=http://dgv.tcag.ca/dgv/docs/GRCh37_hg19_variants_2016-05-15.txt
16 | #direct_url=annoDB/DGV-hg19_20160515.DB.gz a9c4f9f23ce4595d9c33b0a499273d53
17 | source_type=txt
18 | delimiter="\t"
19 |
20 | [variantaccession]
21 | index=1
22 | type=VARCHAR(10)
23 |
24 | [chr]
25 | index=2
26 | type=VARCHAR(48)
27 |
28 | [start]
29 | index=3
30 | type=INT
31 |
32 | [end]
33 | index=4
34 | type=INT
35 |
36 | [varianttype]
37 | index=5
38 | type=VARCHAR(3)
39 |
40 | [variantsubtype]
41 | index=6
42 | type=VARCHAR(11)
43 |
44 | [reference]
45 | index=7
46 | type=VARCHAR(31)
47 |
48 | [pubmedid]
49 | index=8
50 | type=INT
51 |
52 | [method]
53 | index=9
54 | type=VARCHAR(67)
55 |
56 | [platform]
57 | index=10
58 | type=VARCHAR(181)
59 |
60 | [mergedvariants]
61 | index=11
62 | type=VARCHAR(255)
63 |
64 | [supportingvariants]
65 | index=12
66 | type=VARCHAR(1144)
67 |
68 | [mergedorsample]
69 | index=13
70 | type=VARCHAR(1)
71 |
72 | [frequency]
73 | index=14
74 | type=VARCHAR(255)
75 |
76 | [samplesize]
77 | index=15
78 | type=INT
79 |
80 | [observedgains]
81 | index=16
82 | type=INT
83 |
84 | [observedlosses]
85 | index=17
86 | type=INT
87 |
88 | [cohortdescription]
89 | index=18
90 | type=VARCHAR(69)
91 |
92 | [genes]
93 | index=19
94 | type=VARCHAR(412)
95 |
96 | [samples]
97 | index=20
98 | type=VARCHAR(951)
99 |
--------------------------------------------------------------------------------
/resources/annotation/DGV-hg19_20160831.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) Man Chong Leong 2017 (henryleong@rice.edu)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg38=chr,start,end
9 |
10 | [data sources]
11 | description=Database of Genomic Variants, a curated catalogue of human genomic structural variation.
12 | anno_type=range
13 | header=1
14 | version=hg38_20160831
15 | source_url=http://dgv.tcag.ca/dgv/docs/GRCh38_hg38_variants_2016-08-31.txt
16 | #direct_url=annoDB/DGV-hg38_20160515.DB.gz a9c4f9f23ce4595d9c33b0a499273d53
17 | source_type=txt
18 | delimiter="\t"
19 |
20 | [variantaccession]
21 | index=1
22 | type=VARCHAR(10)
23 |
24 | [chr]
25 | index=2
26 | type=VARCHAR(48)
27 |
28 | [start]
29 | index=3
30 | type=INT
31 |
32 | [end]
33 | index=4
34 | type=INT
35 |
36 | [varianttype]
37 | index=5
38 | type=VARCHAR(3)
39 |
40 | [variantsubtype]
41 | index=6
42 | type=VARCHAR(11)
43 |
44 | [reference]
45 | index=7
46 | type=VARCHAR(31)
47 |
48 | [pubmedid]
49 | index=8
50 | type=INT
51 |
52 | [method]
53 | index=9
54 | type=VARCHAR(67)
55 |
56 | [platform]
57 | index=10
58 | type=VARCHAR(181)
59 |
60 | [mergedvariants]
61 | index=11
62 | type=VARCHAR(255)
63 |
64 | [supportingvariants]
65 | index=12
66 | type=VARCHAR(1144)
67 |
68 | [mergedorsample]
69 | index=13
70 | type=VARCHAR(1)
71 |
72 | [frequency]
73 | index=14
74 | type=VARCHAR(255)
75 |
76 | [samplesize]
77 | index=15
78 | type=INT
79 |
80 | [observedgains]
81 | index=16
82 | type=INT
83 |
84 | [observedlosses]
85 | index=17
86 | type=INT
87 |
88 | [cohortdescription]
89 | index=18
90 | type=VARCHAR(69)
91 |
92 | [genes]
93 | index=19
94 | type=VARCHAR(412)
95 |
96 | [samples]
97 | index=20
98 | type=VARCHAR(951)
99 |
--------------------------------------------------------------------------------
/resources/annotation/DGV-hg38_20160831.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) Man Chong Leong 2017 (henryleong@rice.edu)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg38=chr,start,end
9 |
10 | [data sources]
11 | description=Database of Genomic Variants, a curated catalogue of human genomic structural variation.
12 | anno_type=range
13 | header=1
14 | version=hg38_20160831
15 | source_url=http://dgv.tcag.ca/dgv/docs/GRCh38_hg38_variants_2016-08-31.txt
16 | direct_url=annoDB/DGV-hg38_20160831.DB.gz b2183622dfc3cbbcf7d8640893dbf9fd
17 | source_type=txt
18 | delimiter="\t"
19 |
20 | [variantaccession]
21 | index=1
22 | type=VARCHAR(10)
23 |
24 | [chr]
25 | index=2
26 | type=VARCHAR(48)
27 |
28 | [start]
29 | index=3
30 | type=INT
31 |
32 | [end]
33 | index=4
34 | type=INT
35 |
36 | [varianttype]
37 | index=5
38 | type=VARCHAR(3)
39 |
40 | [variantsubtype]
41 | index=6
42 | type=VARCHAR(11)
43 |
44 | [reference]
45 | index=7
46 | type=VARCHAR(31)
47 |
48 | [pubmedid]
49 | index=8
50 | type=INT
51 |
52 | [method]
53 | index=9
54 | type=VARCHAR(67)
55 |
56 | [platform]
57 | index=10
58 | type=VARCHAR(181)
59 |
60 | [mergedvariants]
61 | index=11
62 | type=VARCHAR(255)
63 |
64 | [supportingvariants]
65 | index=12
66 | type=VARCHAR(1144)
67 |
68 | [mergedorsample]
69 | index=13
70 | type=VARCHAR(1)
71 |
72 | [frequency]
73 | index=14
74 | type=VARCHAR(255)
75 |
76 | [samplesize]
77 | index=15
78 | type=INT
79 |
80 | [observedgains]
81 | index=16
82 | type=INT NULL
83 | adj=Nullify(' ')
84 |
85 | [observedlosses]
86 | index=17
87 | type=INT NULL
88 | adj=Nullify(' ')
89 |
90 | [cohortdescription]
91 | index=18
92 | type=VARCHAR(69)
93 |
94 | [genes]
95 | index=19
96 | type=VARCHAR(412)
97 |
98 | [samples]
99 | index=20
100 | type=VARCHAR(951)
101 |
--------------------------------------------------------------------------------
/resources/annotation/Illumina_NRCE-20130307.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=Chromosome,Start,End
9 |
10 | [data sources]
11 | anno_type=range
12 | description=This annotation database contains expanded exome targeted regions covered by
13 | the Nextera Rapid Capture Expanded platform from illumina.
14 | version=20130307
15 | source_url=http://supportres.illumina.com/documents/documentation/chemistry_documentation/samplepreps_nextera/nexterarapidcapture/nexterarapidcapture_expandedexome_targetedregions.txt
16 | direct_url=annoDB/Illumina_NRCE-20130307.DB.gz b6480f4d89cf763924a7d22207c89057
17 | header=7
18 | source_type=txt
19 |
20 | [Name]
21 | index=1
22 | type=VARCHAR(48)
23 | comment=Name of region
24 |
25 | [Chromosome]
26 | index=2
27 | adj=RemoveLeading('chr')
28 | type=VARCHAR(20)
29 |
30 | [Start]
31 | index=3
32 | type=INTEGER
33 | adj=IncreaseBy(1)
34 | comment=Transcription start position
35 |
36 | [End]
37 | index=4
38 | type=INTEGER
39 | comment=Transcription end position
40 |
41 | # the source file has two additional columns Probe Length and Downstream probe length, but
42 | # they are all zero as far as I can tell.
43 |
--------------------------------------------------------------------------------
/resources/annotation/ccdsGene-hg19_20110909.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr, txStart, txEnd
9 |
10 | [data sources]
11 | anno_type=range
12 | description=CCDS Genes
13 | version=hg19_20110909
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz
15 | direct_url=annoDB/ccdsGene-hg19_20110909.DB.gz
16 | source_type=txt
17 |
18 | [name]
19 | index=2
20 | type=VARCHAR(255)
21 | comment=Gene name (usually a CCDS transcript ID)
22 |
23 | [chr]
24 | index=3
25 | adj=RemoveLeading('chr')
26 | type=VARCHAR(20)
27 |
28 | [strand]
29 | index=4
30 | type=CHAR(1) NULL
31 | comment=which DNA strand contains the observed alleles
32 |
33 | [txStart]
34 | index=5
35 | type=INTEGER
36 | adj=IncreaseBy(1)
37 | comment=Transcription start position
38 |
39 | [txEnd]
40 | index=6
41 | type=INTEGER
42 | comment=Transcription end position
43 |
44 | [cdsStart]
45 | index=7
46 | type=INTEGER
47 | adj=IncreaseBy(1)
48 | comment=Coding region start
49 |
50 | [cdsEnd]
51 | index=8
52 | type=INTEGER
53 | comment=Coding region end
54 |
55 | [exonCount]
56 | index=9
57 | type=INTEGER NULL
58 | comment=Number of exons
59 |
60 | [score]
61 | index=12
62 | type=INTEGER NULL
63 | comment=Score
64 |
65 | [name2]
66 | index=13
67 | type=VARCHAR(255)
68 | comment=Alternate name
69 |
70 | [cdsStartStat]
71 | index=14
72 | type=VARCHAR(10)
73 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1'
74 |
75 | [cdsEndStat]
76 | index=15
77 | type=VARCHAR(10)
78 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1'
79 |
--------------------------------------------------------------------------------
/resources/annotation/ccdsGene-hg19_20111206.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr, cdsStart, cdsEnd
9 |
10 | [data sources]
11 | anno_type=range
12 | description=CCDS Genes
13 | version=hg19_20111206
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz
15 | direct_url=annoDB/ccdsGene-hg19_20111206.DB.gz
16 | source_type=txt
17 |
18 | [name]
19 | index=2
20 | type=VARCHAR(255)
21 | comment=Gene name (usually a CCDS transcript ID)
22 |
23 | [chr]
24 | index=3
25 | adj=RemoveLeading('chr')
26 | type=VARCHAR(20)
27 |
28 | [strand]
29 | index=4
30 | type=CHAR(1) NULL
31 | comment=which DNA strand contains the observed alleles
32 |
33 | [cdsStart]
34 | index=7
35 | type=INTEGER
36 | adj=IncreaseBy(1)
37 | comment=Coding region start
38 |
39 | [cdsEnd]
40 | index=8
41 | type=INTEGER
42 | comment=Coding region end
43 |
44 | [exonCount]
45 | index=9
46 | type=INTEGER NULL
47 | comment=Number of exons
48 |
49 | [score]
50 | index=12
51 | type=INTEGER NULL
52 | comment=Score
53 |
54 | [name2]
55 | index=13
56 | type=VARCHAR(255)
57 | comment=Alternate name
58 |
59 | [cdsStartStat]
60 | index=14
61 | type=VARCHAR(10)
62 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1'
63 |
64 | [cdsEndStat]
65 | index=15
66 | type=VARCHAR(10)
67 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1'
68 |
--------------------------------------------------------------------------------
/resources/annotation/ccdsGene-hg19_20130904.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr, cdsStart, cdsEnd
9 |
10 | [data sources]
11 | anno_type=range
12 | description=High-confidence human gene annotations from the Consensus Coding Sequence (CCDS) project.
13 | version=hg19_20130904
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz
15 | direct_url=annoDB/ccdsGene-hg19_20130904.DB.gz 50f2d2e271c7c43beba1b2175ddf62a8
16 | source_type=txt
17 |
18 | [name]
19 | index=2
20 | type=VARCHAR(255)
21 | comment=Gene name (usually a CCDS transcript ID)
22 |
23 | [chr]
24 | index=3
25 | adj=RemoveLeading('chr')
26 | type=VARCHAR(20)
27 |
28 | [strand]
29 | index=4
30 | type=CHAR(1) NULL
31 | comment=which DNA strand contains the observed alleles
32 |
33 | [cdsStart]
34 | index=7
35 | type=INTEGER
36 | adj=IncreaseBy(1)
37 | comment=Coding region start
38 |
39 | [cdsEnd]
40 | index=8
41 | type=INTEGER
42 | comment=Coding region end
43 |
44 | [exonCount]
45 | index=9
46 | type=INTEGER NULL
47 | comment=Number of exons
48 |
49 | [score]
50 | index=12
51 | type=INTEGER NULL
52 | comment=Score
53 |
54 | [name2]
55 | index=13
56 | type=VARCHAR(255)
57 | comment=Alternate name
58 |
59 | [cdsStartStat]
60 | index=14
61 | type=VARCHAR(10)
62 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1'
63 |
64 | [cdsEndStat]
65 | index=15
66 | type=VARCHAR(10)
67 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1'
68 |
--------------------------------------------------------------------------------
/resources/annotation/ccdsGene-hg38_20171008.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2017 Man Chong Leong (henryleong@rice.edu)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg38=chr, cdsStart, cdsEnd
9 |
10 | [data sources]
11 | anno_type=range
12 | description=High-confidence human gene annotations from the Consensus Coding Sequence (CCDS) project.
13 | version=hg38_20171008
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/ccdsGene.txt.gz
15 | direct_url=annoDB/ccdsGene-hg38_20171008.DB.gz 56366edb79a9c2ccc73036ce2a3146e7
16 | source_type=txt
17 |
18 | [name]
19 | index=2
20 | type=VARCHAR(255)
21 | comment=Gene name (usually a CCDS transcript ID)
22 |
23 | [chr]
24 | index=3
25 | adj=RemoveLeading('chr')
26 | type=VARCHAR(20)
27 |
28 | [strand]
29 | index=4
30 | type=CHAR(1) NULL
31 | comment=which DNA strand contains the observed alleles
32 |
33 | [cdsStart]
34 | index=7
35 | type=INTEGER
36 | adj=IncreaseBy(1)
37 | comment=Coding region start
38 |
39 | [cdsEnd]
40 | index=8
41 | type=INTEGER
42 | comment=Coding region end
43 |
44 | [exonCount]
45 | index=9
46 | type=INTEGER NULL
47 | comment=Number of exons
48 |
49 | [score]
50 | index=12
51 | type=INTEGER NULL
52 | comment=Score
53 |
54 | [name2]
55 | index=13
56 | type=VARCHAR(255)
57 | comment=Alternate name
58 |
59 | [cdsStartStat]
60 | index=14
61 | type=VARCHAR(10)
62 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1'
63 |
64 | [cdsEndStat]
65 | index=15
66 | type=VARCHAR(10)
67 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1'
68 |
--------------------------------------------------------------------------------
/resources/annotation/ccdsGene_exon-hg19_20110909.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 |
8 | [linked fields]
9 | hg19=chr, exon_start, exon_end
10 |
11 | [data sources]
12 | anno_type=range
13 | description=CCDS exons
14 | version=hg19_20110909
15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz
16 | direct_url=annoDB/ccdsGene_exon-hg19_20110909.DB.gz
17 | source_type=txt
18 |
19 | [name]
20 | index=2
21 | type=VARCHAR(255)
22 | comment=CCDS gene name
23 |
24 | [chr]
25 | index=3
26 | adj=RemoveLeading('chr')
27 | type=VARCHAR(20)
28 |
29 | [strand]
30 | index=4
31 | type=CHAR(1) NULL
32 | comment=which DNA strand contains the observed alleles
33 |
34 | [txStart]
35 | index=5
36 | type=INTEGER
37 | adj=IncreaseBy(1)
38 | comment=Transcription start position
39 |
40 | [txEnd]
41 | index=6
42 | type=INTEGER
43 | comment=Transcription end position
44 |
45 | [cdsStart]
46 | index=7
47 | type=INTEGER
48 | adj=IncreaseBy(1)
49 | comment=Coding region start
50 |
51 | [cdsEnd]
52 | index=8
53 | type=INTEGER
54 | comment=Coding region end
55 |
56 | [exonCount]
57 | index=9
58 | type=INTEGER NULL
59 | comment=Number of exons
60 |
61 | [exon_start]
62 | index=10
63 | adj=SplitField(','), IncreaseBy(1)
64 | type=INTEGER NOT NULL
65 | comment=exon start position
66 |
67 | [exon_end]
68 | index=11
69 | adj=SplitField(',')
70 | type=INTEGER NOT NULL
71 | comment=exon end position
72 |
73 | [score]
74 | index=12
75 | type=INTEGER NULL
76 | comment=Score
77 |
78 | [name2]
79 | index=13
80 | type=VARCHAR(255)
81 | comment=Alternative name
82 |
83 | [cdsStartStat]
84 | index=14
85 | type=VARCHAR(10)
86 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1'
87 |
88 | [cdsEndStat]
89 | index=15
90 | type=VARCHAR(10)
91 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1'
92 |
--------------------------------------------------------------------------------
/resources/annotation/ccdsGene_exon-hg19_20111206.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 |
8 | [linked fields]
9 | hg19=chr, exon_start, exon_end
10 |
11 | [data sources]
12 | anno_type=range
13 | description=CCDS exons
14 | version=hg19_20111206
15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz
16 | direct_url=annoDB/ccdsGene_exon-hg19_20111206.DB.gz
17 | source_type=txt
18 |
19 | [name]
20 | index=2
21 | type=VARCHAR(255)
22 | comment=CCDS gene name
23 |
24 | [chr]
25 | index=3
26 | adj=RemoveLeading('chr')
27 | type=VARCHAR(20)
28 |
29 | [strand]
30 | index=4
31 | type=CHAR(1) NULL
32 | comment=which DNA strand contains the observed alleles
33 |
34 | [cdsStart]
35 | index=7
36 | type=INTEGER
37 | adj=IncreaseBy(1)
38 | comment=Coding region start
39 |
40 | [cdsEnd]
41 | index=8
42 | type=INTEGER
43 | comment=Coding region end
44 |
45 | [exonCount]
46 | index=9
47 | type=INTEGER NULL
48 | comment=Number of exons
49 |
50 | [exon_start]
51 | index=10
52 | adj=SplitField(','), IncreaseBy(1)
53 | type=INTEGER NOT NULL
54 | comment=exon start position
55 |
56 | [exon_end]
57 | index=11
58 | adj=SplitField(',')
59 | type=INTEGER NOT NULL
60 | comment=exon end position
61 |
62 | [score]
63 | index=12
64 | type=INTEGER NULL
65 | comment=Score
66 |
67 | [name2]
68 | index=13
69 | type=VARCHAR(255)
70 | comment=Alternative name
71 |
72 | [cdsStartStat]
73 | index=14
74 | type=VARCHAR(10)
75 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1'
76 |
77 | [cdsEndStat]
78 | index=15
79 | type=VARCHAR(10)
80 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1'
81 |
--------------------------------------------------------------------------------
/resources/annotation/ccdsGene_exon-hg19_20130904.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 |
8 | [linked fields]
9 | hg19=chr, exon_start, exon_end
10 |
11 | [data sources]
12 | anno_type=range
13 | description=High-confidence human gene annotations from the Consensus Coding Sequence (CCDS) project. This database contains all exon regions of the CCDS genes.
14 | version=hg19_20130904
15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz
16 | direct_url=annoDB/ccdsGene_exon-hg19_20130904.DB.gz 0e903b09c1c2bcd9f636d9477874dd82
17 | source_type=txt
18 |
19 | [name]
20 | index=2
21 | type=VARCHAR(255)
22 | comment=CCDS gene name
23 |
24 | [chr]
25 | index=3
26 | adj=RemoveLeading('chr')
27 | type=VARCHAR(20)
28 |
29 | [strand]
30 | index=4
31 | type=CHAR(1) NULL
32 | comment=which DNA strand contains the observed alleles
33 |
34 | [cdsStart]
35 | index=7
36 | type=INTEGER
37 | adj=IncreaseBy(1)
38 | comment=Coding region start
39 |
40 | [cdsEnd]
41 | index=8
42 | type=INTEGER
43 | comment=Coding region end
44 |
45 | [exonCount]
46 | index=9
47 | type=INTEGER NULL
48 | comment=Number of exons
49 |
50 | [exon_start]
51 | index=10
52 | adj=SplitField(','), IncreaseBy(1)
53 | type=INTEGER NOT NULL
54 | comment=exon start position
55 |
56 | [exon_end]
57 | index=11
58 | adj=SplitField(',')
59 | type=INTEGER NOT NULL
60 | comment=exon end position
61 |
62 | [score]
63 | index=12
64 | type=INTEGER NULL
65 | comment=Score
66 |
67 | [name2]
68 | index=13
69 | type=VARCHAR(255)
70 | comment=Alternative name
71 |
72 | [cdsStartStat]
73 | index=14
74 | type=VARCHAR(10)
75 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1'
76 |
77 | [cdsEndStat]
78 | index=15
79 | type=VARCHAR(10)
80 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1'
81 |
--------------------------------------------------------------------------------
/resources/annotation/ccdsGene_exon-hg38_20171008.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2017 Man Chong Leong (henryleong@rice.edu)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 |
8 | [linked fields]
9 | hg38=chr, exon_start, exon_end
10 |
11 | [data sources]
12 | anno_type=range
13 | description=CCDS exons
14 | version=hg38_20171008
15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/ccdsGene.txt.gz
16 | direct_url=annoDB/ccdsGene_exon-hg38_20171008.DB.gz 293920c679221903b3e69031256d1432
17 | source_type=txt
18 |
19 | [name]
20 | index=2
21 | type=VARCHAR(255)
22 | comment=CCDS gene name
23 |
24 | [chr]
25 | index=3
26 | adj=RemoveLeading('chr')
27 | type=VARCHAR(20)
28 |
29 | [strand]
30 | index=4
31 | type=CHAR(1) NULL
32 | comment=which DNA strand contains the observed alleles
33 |
34 | [cdsStart]
35 | index=7
36 | type=INTEGER
37 | adj=IncreaseBy(1)
38 | comment=Coding region start
39 |
40 | [cdsEnd]
41 | index=8
42 | type=INTEGER
43 | comment=Coding region end
44 |
45 | [exonCount]
46 | index=9
47 | type=INTEGER NULL
48 | comment=Number of exons
49 |
50 | [exon_start]
51 | index=10
52 | adj=SplitField(','), IncreaseBy(1)
53 | type=INTEGER NOT NULL
54 | comment=exon start position
55 |
56 | [exon_end]
57 | index=11
58 | adj=SplitField(',')
59 | type=INTEGER NOT NULL
60 | comment=exon end position
61 |
62 | [score]
63 | index=12
64 | type=INTEGER NULL
65 | comment=Score
66 |
67 | [name2]
68 | index=13
69 | type=VARCHAR(255)
70 | comment=Alternative name
71 |
72 | [cdsStartStat]
73 | index=14
74 | type=VARCHAR(10)
75 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1'
76 |
77 | [cdsEndStat]
78 | index=15
79 | type=VARCHAR(10)
80 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1'
81 |
--------------------------------------------------------------------------------
/resources/annotation/ccdsGene_exon_hg19-20111206.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 |
8 | [linked fields]
9 | hg19=chr, exon_start, exon_end
10 |
11 | [data sources]
12 | anno_type=range
13 | description=CCDS exons
14 | version=hg19_20111206
15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz
16 | direct_url=annoDB/ccdsGene_exon-hg19_20111206.DB.gz
17 | source_type=txt
18 |
19 | [name]
20 | index=2
21 | type=VARCHAR(255)
22 | comment=CCDS gene name
23 |
24 | [chr]
25 | index=3
26 | adj=RemoveLeading('chr')
27 | type=VARCHAR(20)
28 |
29 | [strand]
30 | index=4
31 | type=CHAR(1) NULL
32 | comment=which DNA strand contains the observed alleles
33 |
34 | [cdsStart]
35 | index=7
36 | type=INTEGER
37 | adj=IncreaseBy(1)
38 | comment=Coding region start
39 |
40 | [cdsEnd]
41 | index=8
42 | type=INTEGER
43 | comment=Coding region end
44 |
45 | [exonCount]
46 | index=9
47 | type=INTEGER NULL
48 | comment=Number of exons
49 |
50 | [exon_start]
51 | index=10
52 | adj=SplitField(','), IncreaseBy(1)
53 | type=INTEGER NOT NULL
54 | comment=exon start position
55 |
56 | [exon_end]
57 | index=11
58 | adj=SplitField(',')
59 | type=INTEGER NOT NULL
60 | comment=exon end position
61 |
62 | [score]
63 | index=12
64 | type=INTEGER NULL
65 | comment=Score
66 |
67 | [name2]
68 | index=13
69 | type=VARCHAR(255)
70 | comment=Alternative name
71 |
72 | [cdsStartStat]
73 | index=14
74 | type=VARCHAR(10)
75 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1'
76 |
77 | [cdsEndStat]
78 | index=15
79 | type=VARCHAR(10)
80 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1'
81 |
--------------------------------------------------------------------------------
/resources/annotation/ccdsGene_exon_hg38-20171008.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2017 Man Chong Leong (henryleong@rice.edu)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 |
8 | [linked fields]
9 | hg38=chr, exon_start, exon_end
10 |
11 | [data sources]
12 | anno_type=range
13 | description=CCDS exons
14 | version=hg38_20171008
15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/ccdsGene.txt.gz
16 | #direct_url=annoDB/ccdsGene_exon-hg19_20111206.DB.gz
17 | source_type=txt
18 |
19 | [name]
20 | index=2
21 | type=VARCHAR(255)
22 | comment=CCDS gene name
23 |
24 | [chr]
25 | index=3
26 | adj=RemoveLeading('chr')
27 | type=VARCHAR(20)
28 |
29 | [strand]
30 | index=4
31 | type=CHAR(1) NULL
32 | comment=which DNA strand contains the observed alleles
33 |
34 | [cdsStart]
35 | index=7
36 | type=INTEGER
37 | adj=IncreaseBy(1)
38 | comment=Coding region start
39 |
40 | [cdsEnd]
41 | index=8
42 | type=INTEGER
43 | comment=Coding region end
44 |
45 | [exonCount]
46 | index=9
47 | type=INTEGER NULL
48 | comment=Number of exons
49 |
50 | [exon_start]
51 | index=10
52 | adj=SplitField(','), IncreaseBy(1)
53 | type=INTEGER NOT NULL
54 | comment=exon start position
55 |
56 | [exon_end]
57 | index=11
58 | adj=SplitField(',')
59 | type=INTEGER NOT NULL
60 | comment=exon end position
61 |
62 | [score]
63 | index=12
64 | type=INTEGER NULL
65 | comment=Score
66 |
67 | [name2]
68 | index=13
69 | type=VARCHAR(255)
70 | comment=Alternative name
71 |
72 | [cdsStartStat]
73 | index=14
74 | type=VARCHAR(10)
75 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1'
76 |
77 | [cdsEndStat]
78 | index=15
79 | type=VARCHAR(10)
80 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1'
81 |
--------------------------------------------------------------------------------
/resources/annotation/ccdsGene_hg19-20111206.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr, cdsStart, cdsEnd
9 |
10 | [data sources]
11 | anno_type=range
12 | description=CCDS Genes
13 | version=hg19_20111206
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ccdsGene.txt.gz
15 | direct_url=annoDB/ccdsGene-hg19_20111206.DB.gz
16 | source_type=txt
17 |
18 | [name]
19 | index=2
20 | type=VARCHAR(255)
21 | comment=Gene name (usually a CCDS transcript ID)
22 |
23 | [chr]
24 | index=3
25 | adj=RemoveLeading('chr')
26 | type=VARCHAR(20)
27 |
28 | [strand]
29 | index=4
30 | type=CHAR(1) NULL
31 | comment=which DNA strand contains the observed alleles
32 |
33 | [cdsStart]
34 | index=7
35 | type=INTEGER
36 | adj=IncreaseBy(1)
37 | comment=Coding region start
38 |
39 | [cdsEnd]
40 | index=8
41 | type=INTEGER
42 | comment=Coding region end
43 |
44 | [exonCount]
45 | index=9
46 | type=INTEGER NULL
47 | comment=Number of exons
48 |
49 | [score]
50 | index=12
51 | type=INTEGER NULL
52 | comment=Score
53 |
54 | [name2]
55 | index=13
56 | type=VARCHAR(255)
57 | comment=Alternate name
58 |
59 | [cdsStartStat]
60 | index=14
61 | type=VARCHAR(10)
62 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1'
63 |
64 | [cdsEndStat]
65 | index=15
66 | type=VARCHAR(10)
67 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1'
68 |
--------------------------------------------------------------------------------
/resources/annotation/cytoBand-hg18_20111216.ann:
--------------------------------------------------------------------------------
1 | #Variant tools schema version 1.0
2 | #
3 | # $File: cytoBand.ann $
4 | #
5 | # This file is part of variant_tools, a software application to annotate,
6 | # summarize, and filter variants for next-gen sequencing ananlysis.
7 | # Please visit http://variant_tools.sourceforge.net # for details.
8 | #
9 | # Copyright (C) 2004 - 2010 Bo Peng (bpeng@mdanderson.org)
10 | #
11 | # This program is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # This program is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with this program. If not, see .
23 | #
24 | # Please refer to http://varianttools.sourceforge.net/Annotation/New for
25 | # a description of the format of this file.
26 | #
27 | #
28 |
29 | [linked fields]
30 | hg18=chr, begin, end
31 |
32 | [data sources]
33 | anno_type=range
34 | description=Cyto Band
35 | version=hg18_20111216
36 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/cytoBand.txt.gz
37 | direct_url=annoDB/cytoBand-hg18_20111216.DB.gz
38 | source_type=txt
39 |
40 | [chr]
41 | index=1
42 | adj=RemoveLeading('chr')
43 | type=chromosome
44 |
45 | [begin]
46 | index=2
47 | type=INTEGER
48 | adj=IncreaseBy(1)
49 | comment=start position on chromosome
50 |
51 | [end]
52 | index=3
53 | type=INTEGER
54 | comment=end position on chromosome
55 |
56 | [name]
57 | index=1,4
58 | adj=lambda x: (x[0][3:] + x[1]) if x[0].startswith('chr') else (x[0] + x[1])
59 | type=VARCHAR(255)
60 | comment=name of cytogenic band
61 |
62 | [gieStain]
63 | index=5
64 | type=VARCHAR(255)
65 | comment=giemsa stain results
66 |
--------------------------------------------------------------------------------
/resources/annotation/keggPathway-20110823.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 | #
7 | # To retrieve the kegg pathway details, a database query was run using the
8 | # "hg19" MySQL database from USCS. We have a local copy of the database which can be
9 | # accessed with a mysql client. Or one can use the public MySQL server directly at UCSC:
10 | #
11 | # mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A
12 | # (for help connecting, see: http://genome.ucsc.edu/FAQ/FAQdownloads#download29)
13 | #
14 | # Kegg Pathway Query: This will return three columns
15 | #
16 | # select distinct ckm.ccdsId ccds_id,
17 | # kmd.mapID kegg_pathway_id,
18 | # kmd.description kegg_pathway_description
19 | # from hg19.ccdsKgMap ckm
20 | # join hg19.keggPathway kp on ckm.geneId=kp.kgID
21 | # join hg19.keggMapDesc kmd on kp.mapId=kmd.mapId
22 | # into outfile '/tmp/keggPathway.txt'
23 | # fields terminated by '\t' lines terminated by '\n'
24 | #
25 |
26 | [linked fields]
27 | *=ccdsId
28 |
29 | [data sources]
30 | description=kegg pathway for CCDS genes
31 | version=20110823
32 | anno_type=field
33 | direct_url=annoDB/keggPathway-20110823.DB.gz c97d10fa656535c710280f46b37c95a1
34 | source_url=annoDB/keggPathway-20110823.txt.gz
35 | source_type=txt
36 |
37 | [ccdsId]
38 | index=1
39 | type=VARCHAR(24) NULL
40 | comment=CCDS gene ID
41 |
42 | [KgID]
43 | index=2
44 | type=VARCHAR(24) NULL
45 | comment=Kegg pathway ID
46 |
47 | [KgDesc]
48 | index=3
49 | type=VARCHAR(128) NULL
50 | comment=Description of pathway
51 |
--------------------------------------------------------------------------------
/resources/annotation/knownGene-hg18_20110909.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg18=chr, txStart, txEnd
9 |
10 | [data sources]
11 | anno_type=range
12 | description=UCSC Known Genes
13 | version=hg18_20110909
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/knownGene.txt.gz
15 | direct_url=annoDB/knownGene-hg18_20110909.DB.gz
16 | source_type=txt
17 |
18 | [chr]
19 | index=2
20 | adj=RemoveLeading('chr')
21 | type=VARCHAR(20)
22 |
23 | [strand]
24 | index=3
25 | type=CHAR(1) NULL
26 | comment=which DNA strand contains the observed alleles
27 |
28 | [txStart]
29 | index=4
30 | type=INTEGER
31 | adj=IncreaseBy(1)
32 | comment=Transcription start position
33 |
34 | [txEnd]
35 | index=5
36 | type=INTEGER
37 | comment=Transcription end position
38 |
39 | [cdsStart]
40 | index=6
41 | type=INTEGER
42 | adj=IncreaseBy(1)
43 | comment=Coding region start
44 |
45 | [cdsEnd]
46 | index=7
47 | type=INTEGER
48 | comment=Coding region end
49 |
50 | [exonCount]
51 | index=8
52 | type=INTEGER NULL
53 | comment=Number of exons
54 |
--------------------------------------------------------------------------------
/resources/annotation/knownGene-hg18_20121219.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg18=chr, txStart, txEnd
9 |
10 | [data sources]
11 | anno_type=range
12 | description=UCSC Known Genes
13 | version=hg18_20121219
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/knownGene.txt.gz
15 | direct_url=annoDB/knownGene-hg18_20121219.DB.gz
16 | source_type=txt
17 |
18 | [name]
19 | index=1
20 | type=VARCHAR(48)
21 | comment=Name of gene such as uc001aaa.3
22 |
23 | [chr]
24 | index=2
25 | adj=RemoveLeading('chr')
26 | type=VARCHAR(20)
27 |
28 | [strand]
29 | index=3
30 | type=CHAR(1) NULL
31 | comment=which DNA strand contains the observed alleles
32 |
33 | [txStart]
34 | index=4
35 | type=INTEGER
36 | adj=IncreaseBy(1)
37 | comment=Transcription start position
38 |
39 | [txEnd]
40 | index=5
41 | type=INTEGER
42 | comment=Transcription end position
43 |
44 | [cdsStart]
45 | index=6
46 | type=INTEGER
47 | adj=IncreaseBy(1)
48 | comment=Coding region start
49 |
50 | [cdsEnd]
51 | index=7
52 | type=INTEGER
53 | comment=Coding region end
54 |
55 | [exonCount]
56 | index=8
57 | type=INTEGER NULL
58 | comment=Number of exons
59 |
--------------------------------------------------------------------------------
/resources/annotation/knownGene-hg19_20110909.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr, txStart, txEnd
9 |
10 | [data sources]
11 | anno_type=range
12 | description=UCSC Known Genes
13 | version=hg19_20110909
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz
15 | direct_url=annoDB/knownGene-hg19_20110909.DB.gz c1f4cef575aad2c07d3fdff648ab2bda
16 | source_type=txt
17 |
18 | [chr]
19 | index=2
20 | adj=RemoveLeading('chr')
21 | type=VARCHAR(20)
22 |
23 | [strand]
24 | index=3
25 | type=CHAR(1) NULL
26 | comment=which DNA strand contains the observed alleles
27 |
28 | [txStart]
29 | index=4
30 | type=INTEGER
31 | adj=IncreaseBy(1)
32 | comment=Transcription start position
33 |
34 | [txEnd]
35 | index=5
36 | type=INTEGER
37 | comment=Transcription end position
38 |
39 | [cdsStart]
40 | index=6
41 | type=INTEGER
42 | adj=IncreaseBy(1)
43 | comment=Coding region start
44 |
45 | [cdsEnd]
46 | index=7
47 | type=INTEGER
48 | comment=Coding region end
49 |
50 | [exonCount]
51 | index=8
52 | type=INTEGER NULL
53 | comment=Number of exons
54 |
--------------------------------------------------------------------------------
/resources/annotation/knownGene-hg19_20121219.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr, txStart, txEnd
9 |
10 | [data sources]
11 | anno_type=range
12 | description=UCSC Known Genes
13 | version=hg19_20121219
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz
15 | direct_url=annoDB/knownGene-hg19_20121219.DB.gz c07367da2392c5e0277e232c89f49c68
16 | source_type=txt
17 |
18 | [name]
19 | index=1
20 | type=VARCHAR(48)
21 | comment=Name of gene such as uc001aaa.3
22 |
23 | [chr]
24 | index=2
25 | adj=RemoveLeading('chr')
26 | type=VARCHAR(20)
27 |
28 | [strand]
29 | index=3
30 | type=CHAR(1) NULL
31 | comment=which DNA strand contains the observed alleles
32 |
33 | [txStart]
34 | index=4
35 | type=INTEGER
36 | adj=IncreaseBy(1)
37 | comment=Transcription start position
38 |
39 | [txEnd]
40 | index=5
41 | type=INTEGER
42 | comment=Transcription end position
43 |
44 | [cdsStart]
45 | index=6
46 | type=INTEGER
47 | adj=IncreaseBy(1)
48 | comment=Coding region start
49 |
50 | [cdsEnd]
51 | index=7
52 | type=INTEGER
53 | comment=Coding region end
54 |
55 | [exonCount]
56 | index=8
57 | type=INTEGER NULL
58 | comment=Number of exons
59 |
--------------------------------------------------------------------------------
/resources/annotation/knownGene-hg19_20130904.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr, txStart, txEnd
9 |
10 | [data sources]
11 | anno_type=range
12 | description=Gene predictions based on data from RefSeq, Genbank, CCDS and UniProt, from the UCSC KnownGene track.
13 | version=hg19_20130904
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz
15 | direct_url=annoDB/knownGene-hg19_20130904.DB.gz 0dfff9b8e479bbae7f0d8cb4fb406a29
16 | source_type=txt
17 |
18 | [name]
19 | index=1
20 | type=VARCHAR(48)
21 | comment=Name of gene such as uc001aaa.3
22 |
23 | [chr]
24 | index=2
25 | adj=RemoveLeading('chr')
26 | type=VARCHAR(20)
27 |
28 | [strand]
29 | index=3
30 | type=CHAR(1) NULL
31 | comment=which DNA strand contains the observed alleles
32 |
33 | [txStart]
34 | index=4
35 | type=INTEGER
36 | adj=IncreaseBy(1)
37 | comment=Transcription start position
38 |
39 | [txEnd]
40 | index=5
41 | type=INTEGER
42 | comment=Transcription end position
43 |
44 | [cdsStart]
45 | index=6
46 | type=INTEGER
47 | adj=IncreaseBy(1)
48 | comment=Coding region start
49 |
50 | [cdsEnd]
51 | index=7
52 | type=INTEGER
53 | comment=Coding region end
54 |
55 | [exonCount]
56 | index=8
57 | type=INTEGER NULL
58 | comment=Number of exons
59 |
--------------------------------------------------------------------------------
/resources/annotation/knownGene-hg38_20160328.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2017 Man Chong Leong (henryleong@rice.edu)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg38=chr, txStart, txEnd
9 |
10 | [data sources]
11 | anno_type=range
12 | description=Gene predictions based on data from RefSeq, Genbank, CCDS and UniProt, from the UCSC KnownGene track.
13 | version=hg38_20160328
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/knownGene.txt.gz
15 | direct_url=annoDB/knownGene-hg38_20160328.DB.gz 6f1d0d4b00139626f34198cef68eb84f
16 | source_type=txt
17 |
18 | [name]
19 | index=1
20 | type=VARCHAR(48)
21 | comment=Name of gene such as uc001aaa.3
22 |
23 | [chr]
24 | index=2
25 | adj=RemoveLeading('chr')
26 | type=VARCHAR(20)
27 |
28 | [strand]
29 | index=3
30 | type=CHAR(1) NULL
31 | comment=which DNA strand contains the observed alleles
32 |
33 | [txStart]
34 | index=4
35 | type=INTEGER
36 | adj=IncreaseBy(1)
37 | comment=Transcription start position
38 |
39 | [txEnd]
40 | index=5
41 | type=INTEGER
42 | comment=Transcription end position
43 |
44 | [cdsStart]
45 | index=6
46 | type=INTEGER
47 | adj=IncreaseBy(1)
48 | comment=Coding region start
49 |
50 | [cdsEnd]
51 | index=7
52 | type=INTEGER
53 | comment=Coding region end
54 |
55 | [exonCount]
56 | index=8
57 | type=INTEGER NULL
58 | comment=Number of exons
59 |
60 | [exonStarts]
61 | index=9
62 | type=VARCHAR(255)
63 | comment=Exon start positions (or end positions for minus strand item)
64 |
65 | [exonEnds]
66 | index=10
67 | type=VARCHAR(255)
68 | comment=Exon end positions (or start positions for minus strand item)
69 |
70 | [proteinID]
71 | index=11
72 | type=VARCHAR(40)
73 | comment=UniProt display ID, UniProt accession, or RefSeq protein ID
74 |
75 | [alignID]
76 | index=12
77 | type=VARCHAR(255)
78 | comment=Unique identifier (GENCODE transcript ID for GENCODE Basic)
79 |
--------------------------------------------------------------------------------
/resources/annotation/knownGene_exon-hg18_20110909.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg18=chr, exon_start, exon_end
9 |
10 | [data sources]
11 | anno_type=range
12 | description=Exon locations of UCSC Known Genes
13 | version=hg18_20110909
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/knownGene.txt.gz
15 | direct_url=annoDB/knownGene_exon-hg18_20110909.DB.gz
16 | source_type=txt
17 |
18 | [chr]
19 | index=2
20 | adj=RemoveLeading('chr')
21 | type=VARCHAR(20)
22 |
23 | [strand]
24 | index=3
25 | type=CHAR(1) NULL
26 | comment=which DNA strand contains the observed alleles
27 |
28 | [txStart]
29 | index=4
30 | type=INTEGER
31 | adj=IncreaseBy(1)
32 | comment=Transcription start position
33 |
34 | [txEnd]
35 | index=5
36 | type=INTEGER
37 | comment=Transcription end position
38 |
39 | [cdsStart]
40 | index=6
41 | type=INTEGER
42 | adj=IncreaseBy(1)
43 | comment=Coding region start
44 |
45 | [cdsEnd]
46 | index=7
47 | type=INTEGER
48 | comment=Coding region end
49 |
50 | [exonCount]
51 | index=8
52 | type=INTEGER NULL
53 | comment=Number of exons
54 |
55 | [exon_start]
56 | index=9
57 | adj=SplitField(','), IncreaseBy(1)
58 | type=INTEGER NOT NULL
59 | comment=exon start position
60 |
61 | [exon_end]
62 | index=10
63 | adj=SplitField(',')
64 | type=INTEGER NOT NULL
65 | comment=exon end position
66 |
--------------------------------------------------------------------------------
/resources/annotation/knownGene_exon-hg19_20110909.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr, exon_start, exon_end
9 |
10 | [data sources]
11 | anno_type=range
12 | description=UCSC Known Genes
13 | version=hg19_20110909
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz
15 | direct_url=annoDB/knownGene_exon-hg19_20110909.DB.gz d24b555a54746f715fa6bf83d7d2643f
16 | source_type=txt
17 |
18 | [chr]
19 | index=2
20 | adj=RemoveLeading('chr')
21 | type=VARCHAR(20)
22 |
23 | [strand]
24 | index=3
25 | type=CHAR(1) NULL
26 | comment=which DNA strand contains the observed alleles
27 |
28 | [txStart]
29 | index=4
30 | type=INTEGER
31 | adj=IncreaseBy(1)
32 | comment=Transcription start position
33 |
34 | [txEnd]
35 | index=5
36 | type=INTEGER
37 | comment=Transcription end position
38 |
39 | [cdsStart]
40 | index=6
41 | type=INTEGER
42 | adj=IncreaseBy(1)
43 | comment=Coding region start
44 |
45 | [cdsEnd]
46 | index=7
47 | type=INTEGER
48 | comment=Coding region end
49 |
50 | [exonCount]
51 | index=8
52 | type=INTEGER NULL
53 | comment=Number of exons
54 |
55 | [exon_start]
56 | index=9
57 | adj=SplitField(','), IncreaseBy(1)
58 | type=INTEGER NOT NULL
59 | comment=exon start position
60 |
61 | [exon_end]
62 | index=10
63 | adj=SplitField(',')
64 | type=INTEGER NOT NULL
65 | comment=exon end position
66 |
--------------------------------------------------------------------------------
/resources/annotation/knownGene_exon-hg19_20130904.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr, exon_start, exon_end
9 |
10 | [data sources]
11 | anno_type=range
12 | description=Gene predictions based on data from RefSeq, Genbank, CCDS and UniProt, from the UCSC KnownGene track. This
13 | database contains all exome regions of the UCSC known gene database.
14 | version=hg19_20130904
15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz
16 | direct_url=annoDB/knownGene_exon-hg19_20130904.DB.gz b944475f2b889f6d545d39682ec5b066
17 | source_type=txt
18 |
19 | [chr]
20 | index=2
21 | adj=RemoveLeading('chr')
22 | type=VARCHAR(20)
23 |
24 | [strand]
25 | index=3
26 | type=CHAR(1) NULL
27 | comment=which DNA strand contains the observed alleles
28 |
29 | [txStart]
30 | index=4
31 | type=INTEGER
32 | adj=IncreaseBy(1)
33 | comment=Transcription start position
34 |
35 | [txEnd]
36 | index=5
37 | type=INTEGER
38 | comment=Transcription end position
39 |
40 | [cdsStart]
41 | index=6
42 | type=INTEGER
43 | adj=IncreaseBy(1)
44 | comment=Coding region start
45 |
46 | [cdsEnd]
47 | index=7
48 | type=INTEGER
49 | comment=Coding region end
50 |
51 | [exonCount]
52 | index=8
53 | type=INTEGER NULL
54 | comment=Number of exons
55 |
56 | [exon_start]
57 | index=9
58 | adj=SplitField(','), IncreaseBy(1)
59 | type=INTEGER NOT NULL
60 | comment=exon start position
61 |
62 | [exon_end]
63 | index=10
64 | adj=SplitField(',')
65 | type=INTEGER NOT NULL
66 | comment=exon end position
67 |
--------------------------------------------------------------------------------
/resources/annotation/phastCons-hg19_20110909.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 | #
7 | [linked fields]
8 | hg19=chr, start, end
9 |
10 | [data sources]
11 | anno_type=range
12 | description=PhastCons Conservation Scores
13 | version=hg19_20110909
14 | direct_url=annoDB/phastCons-hg19_20110909.DB.gz 5e9c3b8434330a7bc8230bfa5bc10812
15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/phastCons46way.txt.gz
16 | source_type=txt
17 |
18 | [chr]
19 | index=2
20 | adj=RemoveLeading('chr')
21 | type=VARCHAR(20)
22 |
23 | [start]
24 | index=3
25 | type=INTEGER
26 | adj=IncreaseBy(1)
27 | comment=Start position in chromosome
28 |
29 | [end]
30 | index=4
31 | type=INTEGER
32 | comment=End position in chromosome
33 |
34 | [name]
35 | index=5
36 | type=VARCHAR(225)
37 | comment=Name of conserved region
38 |
39 | [count]
40 | index=7
41 | type=INTEGER
42 | comment=Number of values in this block
43 |
44 | [valid_count]
45 | index=12
46 | type=INTEGER
47 | comment=Number of valid values in this block
48 |
49 | [lower_limit]
50 | index=10
51 | type=DOUBLE
52 | comment=Lowest value in this block
53 |
54 | [data_range]
55 | index=11
56 | type=DOUBLE
57 | comment=Spread of values in this block. lower_limit + data_range = upper_limit
58 |
59 | [sum_data]
60 | index=13
61 | type=DOUBLE
62 | comment=Sum of values in this block (can be used for calculate average and stddev of conservation scores)
63 |
64 | [sum_squares]
65 | index=14
66 | type=DOUBLE
67 | comment=Sum of values squared in this block (can be used for calculating stddev of conservation scores)
68 |
--------------------------------------------------------------------------------
/resources/annotation/phastCons-hg19_20130322.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 | #
7 | [linked fields]
8 | hg19=chr, start, end
9 |
10 | [data sources]
11 | anno_type=range
12 | description=PhastCons Conservation Scores
13 | version=hg19_20130322
14 | direct_url=annoDB/phastCons-hg19_20130322.DB.gz 0309a4eb2fdd291f977fe45434879a85
15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/phastCons46way.txt.gz
16 | source_type=txt
17 |
18 | [chr]
19 | index=2
20 | adj=RemoveLeading('chr')
21 | type=VARCHAR(20)
22 |
23 | [start]
24 | index=3
25 | type=INTEGER
26 | adj=IncreaseBy(1)
27 | comment=Start position in chromosome
28 |
29 | [end]
30 | index=4
31 | type=INTEGER
32 | comment=End position in chromosome
33 |
34 | [name]
35 | index=5
36 | type=VARCHAR(225)
37 | comment=Name of conserved region
38 |
39 | [total_count]
40 | index=7
41 | type=INTEGER
42 | comment=Number of values in this block
43 |
44 | [valid_count]
45 | index=12
46 | type=INTEGER
47 | comment=Number of valid values in this block
48 |
49 | [lower_limit]
50 | index=10
51 | type=DOUBLE
52 | comment=Lowest value in this block
53 |
54 | [data_range]
55 | index=11
56 | type=DOUBLE
57 | comment=Spread of values in this block. lower_limit + data_range = upper_limit
58 |
59 | [sum_data]
60 | index=13
61 | type=DOUBLE
62 | comment=Sum of values in this block (can be used for calculate average and stddev of conservation scores)
63 |
64 | [sum_squares]
65 | index=14
66 | type=DOUBLE
67 | comment=Sum of values squared in this block (can be used for calculating stddev of conservation scores)
68 |
--------------------------------------------------------------------------------
/resources/annotation/phastCons-hg38_20150913.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2017 Man Chong Leong (henryleong@rice.edu)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 | #
7 | [linked fields]
8 | hg38=chr, chr_start, chr_end
9 |
10 | [data sources]
11 | anno_type=range
12 | description=PhastCons Conservation Scores
13 | version=hg38_20150913
14 | direct_url=annoDB/phastCons-hg38_20150913.DB.gz f1a21eefa6b0a330a43ea5dac2cf70d6
15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/phastCons100way.txt.gz
16 | source_type=txt
17 |
18 | [chr]
19 | index=2
20 | adj=RemoveLeading('chr')
21 | type=VARCHAR(20)
22 | comment=Reference sequence chromosome or scaffold
23 |
24 | [chr_start]
25 | index=3
26 | type=INTEGER
27 | adj=IncreaseBy(1)
28 | comment=Start position in chromosome
29 |
30 | [chr_end]
31 | index=4
32 | type=INTEGER
33 | comment=End position in chromosome
34 |
35 | [name]
36 | index=5
37 | type=VARCHAR(225)
38 | comment=Name of conserved region
39 |
40 | [total_count]
41 | index=7
42 | type=INTEGER
43 | comment=Number of values in this block
44 |
45 | [valid_count]
46 | index=12
47 | type=INTEGER
48 | comment=Number of valid values in this block
49 |
50 | [lower_limit]
51 | index=10
52 | type=DOUBLE
53 | comment=Lowest value in this block
54 |
55 | [data_range]
56 | index=11
57 | type=DOUBLE
58 | comment=Spread of values in this block. lower_limit + data_range = upper_limit
59 |
60 | [sum_data]
61 | index=13
62 | type=DOUBLE
63 | comment=Sum of values in this block (can be used for calculate average and stddev of conservation scores)
64 |
65 | [sum_squares]
66 | index=14
67 | type=DOUBLE
68 | comment=Sum of values squared in this block (can be used for calculating stddev of conservation scores)
69 |
70 | [file]
71 | index=9
72 | type=VARCHAR(255)
73 | comment=path name to data file, one byte per value
74 |
--------------------------------------------------------------------------------
/resources/annotation/phastConsElements-hg19_20130622.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 | #
7 | [linked fields]
8 | hg19=chr, start, end
9 |
10 | [data sources]
11 | anno_type=range
12 | description=PhastCons Conservation Scores
13 | version=hg19_20130622
14 | direct_url=annoDB/phastConsElements-hg19_20130622.DB.gz c212ac8711f0e5194777e7dc9a61b447
15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/phastConsElements46way.txt.gz
16 | source_type=txt
17 |
18 | [chr]
19 | index=2
20 | adj=RemoveLeading('chr')
21 | type=VARCHAR(20)
22 |
23 | [start]
24 | index=3
25 | type=INTEGER
26 | adj=IncreaseBy(1)
27 | comment=Start position in chromosome
28 |
29 | [end]
30 | index=4
31 | type=INTEGER
32 | comment=End position in chromosome
33 |
34 | [name]
35 | index=5
36 | type=VARCHAR(225)
37 | comment=Name of conserved region
38 |
39 | [score]
40 | index=6
41 | type=INTEGER
42 | comment=Phast cons score from 0 to 1000
43 |
--------------------------------------------------------------------------------
/resources/annotation/phastConsElements-hg38_20150913.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2017 Man Chong Leong (henryleong@rice.edu)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 | #
7 | [linked fields]
8 | hg38=chr, start, end
9 |
10 | [data sources]
11 | anno_type=range
12 | description=PhastCons Conservation Scores
13 | version=hg38_20150913
14 | direct_url=annoDB/phastConsElements-hg38_20150913.DB.gz 5b9649e1ae18825622090a4e1f5e07db
15 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/phastConsElements100way.txt.gz
16 | source_type=txt
17 |
18 | [chr]
19 | index=2
20 | adj=RemoveLeading('chr')
21 | type=VARCHAR(255)
22 | comment=Reference sequence chromosome or scaffold
23 |
24 | [start]
25 | index=3
26 | type=INTEGER
27 | adj=IncreaseBy(1)
28 | comment=Start position in chromosome
29 |
30 | [end]
31 | index=4
32 | type=INTEGER
33 | comment=End position in chromosome
34 |
35 | [name]
36 | index=5
37 | type=VARCHAR(225)
38 | comment=Name of conserved region
39 |
40 | [score]
41 | index=6
42 | type=INTEGER
43 | comment=Phast cons score from 0 to 1000
44 |
--------------------------------------------------------------------------------
/resources/annotation/refGene-hg18_20110909.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg18=chr, txStart, txEnd
9 |
10 | [data sources]
11 | anno_type=range
12 | description=refseq Genes
13 | version=hg18_20110909
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz
15 | direct_url=annoDB/refGene-hg18_20110909.DB.gz
16 | source_type=txt
17 |
18 | [name]
19 | index=2
20 | type=VARCHAR(255)
21 | comment=Gene name
22 |
23 | [chr]
24 | index=3
25 | adj=RemoveLeading('chr')
26 | type=VARCHAR(20)
27 |
28 | [strand]
29 | index=4
30 | type=CHAR(1) NULL
31 | comment=which DNA strand contains the observed alleles
32 |
33 | [txStart]
34 | index=5
35 | type=INTEGER
36 | adj=IncreaseBy(1)
37 | comment=Transcription start position
38 |
39 | [txEnd]
40 | index=6
41 | type=INTEGER
42 | comment=Transcription end position
43 |
44 | [cdsStart]
45 | index=7
46 | type=INTEGER
47 | adj=IncreaseBy(1)
48 | comment=Coding region start
49 |
50 | [cdsEnd]
51 | index=8
52 | type=INTEGER
53 | comment=Coding region end
54 |
55 | [exonCount]
56 | index=9
57 | type=INTEGER NULL
58 | comment=Number of exons
59 |
60 | [score]
61 | index=12
62 | type=INTEGER NULL
63 | comment=Score
64 |
65 | [name2]
66 | index=13
67 | type=VARCHAR(255)
68 | comment=Alternative name
69 |
70 | [cdsStartStat]
71 | index=14
72 | type=VARCHAR(10)
73 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1'
74 |
75 | [cdsEndStat]
76 | index=15
77 | type=VARCHAR(10)
78 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1'
79 |
--------------------------------------------------------------------------------
/resources/annotation/refGene-hg19_20110909.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr, txStart, txEnd
9 |
10 | [data sources]
11 | anno_type=range
12 | description=refseq Genes
13 | version=hg19_20110909
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/refGene.txt.gz
15 | direct_url=annoDB/refGene-hg19_20110909.DB.gz 03443479f56812cea4b2d42aebbc1151
16 | source_type=txt
17 |
18 | [name]
19 | index=2
20 | type=VARCHAR(255)
21 | comment=Gene name
22 |
23 | [chr]
24 | index=3
25 | adj=RemoveLeading('chr')
26 | type=VARCHAR(20)
27 |
28 | [strand]
29 | index=4
30 | type=CHAR(1) NULL
31 | comment=which DNA strand contains the observed alleles
32 |
33 | [txStart]
34 | index=5
35 | type=INTEGER
36 | adj=IncreaseBy(1)
37 | comment=Transcription start position
38 |
39 | [txEnd]
40 | index=6
41 | type=INTEGER
42 | comment=Transcription end position
43 |
44 | [cdsStart]
45 | index=7
46 | type=INTEGER
47 | adj=IncreaseBy(1)
48 | comment=Coding region start
49 |
50 | [cdsEnd]
51 | index=8
52 | type=INTEGER
53 | comment=Coding region end
54 |
55 | [exonCount]
56 | index=9
57 | type=INTEGER NULL
58 | comment=Number of exons
59 |
60 | [score]
61 | index=12
62 | type=INTEGER NULL
63 | comment=Score
64 |
65 | [name2]
66 | index=13
67 | type=VARCHAR(255)
68 | comment=Alternative name
69 |
70 | [cdsStartStat]
71 | index=14
72 | type=VARCHAR(10)
73 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1'
74 |
75 | [cdsEndStat]
76 | index=15
77 | type=VARCHAR(10)
78 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1'
79 |
--------------------------------------------------------------------------------
/resources/annotation/refGene_exon-hg18_20110909.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg18=chr, exon_start, exon_end
9 |
10 | [data sources]
11 | anno_type=range
12 | description=refseq Genes
13 | version=hg18_20110909
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz
15 | direct_url=annoDB/refGene_exon-hg18_20110909.DB.gz
16 | source_type=txt
17 |
18 | [name]
19 | index=2
20 | type=VARCHAR(255)
21 | comment=Gene name
22 |
23 | [chr]
24 | index=3
25 | adj=RemoveLeading('chr')
26 | type=VARCHAR(20)
27 |
28 | [strand]
29 | index=4
30 | type=CHAR(1) NULL
31 | comment=which DNA strand contains the observed alleles
32 |
33 | [txStart]
34 | index=5
35 | type=INTEGER
36 | adj=IncreaseBy(1)
37 | comment=Transcription start position
38 |
39 | [txEnd]
40 | index=6
41 | type=INTEGER
42 | comment=Transcription end position
43 |
44 | [cdsStart]
45 | index=7
46 | type=INTEGER
47 | adj=IncreaseBy(1)
48 | comment=Coding region start
49 |
50 | [cdsEnd]
51 | index=8
52 | type=INTEGER
53 | comment=Coding region end
54 |
55 | [exonCount]
56 | index=9
57 | type=INTEGER NULL
58 | comment=Number of exons
59 |
60 | [exon_start]
61 | index=10
62 | adj=SplitField(','), IncreaseBy(1)
63 | type=INTEGER NOT NULL
64 | comment=exon start position
65 |
66 | [exon_end]
67 | index=11
68 | adj=SplitField(',')
69 | type=INTEGER NOT NULL
70 | comment=exon end position
71 |
72 | [score]
73 | index=12
74 | type=INTEGER NULL
75 | comment=Score
76 |
77 | [name2]
78 | index=13
79 | type=VARCHAR(255)
80 | comment=Alternative name
81 |
82 | [cdsStartStat]
83 | index=14
84 | type=VARCHAR(10)
85 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1'
86 |
87 | [cdsEndStat]
88 | index=15
89 | type=VARCHAR(10)
90 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1'
91 |
--------------------------------------------------------------------------------
/resources/annotation/refGene_exon-hg19_20110909.ann:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Annotation/New
5 | # for a description of the format of this file.
6 |
7 | [linked fields]
8 | hg19=chr, exon_start, exon_end
9 |
10 | [data sources]
11 | anno_type=range
12 | description=Exon locations of refseq Genes
13 | version=hg19_20110909
14 | source_url=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/refGene.txt.gz
15 | direct_url=annoDB/refGene_exon-hg19_20110909.DB.gz 0cde491895e87928560511b9d7ef91d1
16 | source_type=txt
17 |
18 | [name]
19 | index=2
20 | type=VARCHAR(255)
21 | comment=Gene name
22 |
23 | [chr]
24 | index=3
25 | adj=RemoveLeading('chr')
26 | type=VARCHAR(20)
27 |
28 | [strand]
29 | index=4
30 | type=CHAR(1) NULL
31 | comment=which DNA strand contains the observed alleles
32 |
33 | [txStart]
34 | index=5
35 | type=INTEGER
36 | adj=IncreaseBy(1)
37 | comment=Transcription start position
38 |
39 | [txEnd]
40 | index=6
41 | type=INTEGER
42 | comment=Transcription end position
43 |
44 | [cdsStart]
45 | index=7
46 | type=INTEGER
47 | adj=IncreaseBy(1)
48 | comment=Coding region start
49 |
50 | [cdsEnd]
51 | index=8
52 | type=INTEGER
53 | comment=Coding region end
54 |
55 | [exonCount]
56 | index=9
57 | type=INTEGER NULL
58 | comment=Number of exons
59 |
60 | [exon_start]
61 | index=10
62 | adj=SplitField(','), IncreaseBy(1)
63 | type=INTEGER NOT NULL
64 | comment=exon start position
65 |
66 | [exon_end]
67 | index=11
68 | adj=SplitField(',')
69 | type=INTEGER NOT NULL
70 | comment=exon end position
71 |
72 | [score]
73 | index=12
74 | type=INTEGER NULL
75 | comment=Score
76 |
77 | [name2]
78 | index=13
79 | type=VARCHAR(255)
80 | comment=Alternative name
81 |
82 | [cdsStartStat]
83 | index=14
84 | type=VARCHAR(10)
85 | comment=cds start stat, can be 'non', 'unk', 'incompl', and 'cmp1'
86 |
87 | [cdsEndStat]
88 | index=15
89 | type=VARCHAR(10)
90 | comment=cds end stat, can be 'non', 'unk', 'incompl', and 'cmp1'
91 |
--------------------------------------------------------------------------------
/resources/format/ANNOVAR.fmt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Format/New for
5 | # a description of the format of this file.
6 | #
7 |
8 | [format description]
9 | description=Input format of ANNOVAR. No genotype is defined.
10 | variant=chr,pos,ref,alt
11 | delimiter=None
12 | export_by=chr,pos,ref,alt
13 |
14 | [DEFAULT]
15 | # one or more fields that will be outputted in the comment field, if specified.
16 | comment_string=
17 | comment_string_comment=Output one or more fields to the optional comment column of this format.
18 |
19 | [chr]
20 | index=1
21 | type=VARCHAR(20)
22 | adj=RemoveLeading('chr')
23 | comment=Chromosome
24 |
25 | [pos]
26 | index=2
27 | type=INTEGER NOT NULL
28 | comment=1-based position
29 |
30 | [ref]
31 | index=4
32 | type=VARCHAR(255)
33 | comment=Reference allele, '-' for insertion.
34 |
35 | [alt]
36 | index=5
37 | type=VARCHAR(255)
38 | comment=Alternative allele, '-' for deletion.
39 |
40 | [col_1]
41 | field=chr
42 | comment=chromosome
43 |
44 | [col_2]
45 | field=pos
46 | comment=position (1-based)
47 |
48 | [col_3]
49 | field=pos,ref
50 | adj=lambda x: str(int(x[0])-1+len(x[1]))
51 | comment=end position
52 |
53 | [col_4]
54 | field=ref
55 | comment=reference allele
56 |
57 | [col_5]
58 | field=alt
59 | comment=alternative allele
60 |
61 | [col_6]
62 | field=%(comment_string)s
63 | adj=JoinFields(',')
64 | comment=optional column
65 |
--------------------------------------------------------------------------------
/resources/format/ANNOVAR_exonic_variant_function.fmt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Format/New for
5 | # a description of the format of this file.
6 | #
7 |
8 | [format description]
9 | description=Output from ANNOVAR for files of type *exonic_variant_function, generated from command "path/to/annovar/annotate_variation.pl annovar.txt path/to/annovar/humandb/". This format imports chr, pos, ref, alt and ANNOVAR annotations. For details please refer to http://www.openbioinformatics.org/annovar/annovar_gene.html
10 | variant=chr, pos, ref, alt
11 | variant_info=%(var_info)s
12 |
13 | [DEFAULT]
14 | var_info=mut_type
15 | var_info_comment=Fields to be outputted, can be one or both of mut_type and function.
16 |
17 | [chr]
18 | index=4
19 | type=VARCHAR(20)
20 | adj=RemoveLeading('chr')
21 | comment=Chromosome
22 |
23 | [pos]
24 | index=5
25 | type=INTEGER NOT NULL
26 | comment=1-based position, hg18
27 |
28 | [ref]
29 | index=7
30 | type=VARCHAR(255)
31 | comment=Reference allele, '-' for insertion.
32 |
33 | [alt]
34 | index=8
35 | type=VARCHAR(255)
36 | comment=Alternative allele, '-' for deletion.
37 |
38 | [mut_type]
39 | index=2
40 | type=VARCHAR(255)
41 | comment=the functional consequences of the variant.
42 |
43 | [genename]
44 | index=3
45 | type=VARCHAR(255)
46 | adj=ExtractField(1, sep=':')
47 | comment=Gene name (for the first exon if the variant is in more than one exons, but usually the names for all exons are the same).
48 |
49 | [function]
50 | index=3
51 | type=VARCHAR(255)
52 | comment=the gene name, the transcript identifier and the sequence change in the corresponding transcript
53 |
--------------------------------------------------------------------------------
/resources/format/ANNOVAR_variant_function.fmt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Format/New for
5 | # a description of the format of this file.
6 | #
7 |
8 | [format description]
9 | description=Output from ANNOVAR for files of type "*.variant_function", generated from command "path/to/annovar/annotate_variation.pl annovar.txt path/to/annovar/humandb/". This format imports chr, pos, ref, alt and ANNOVAR annotations. For details please refer to http://www.openbioinformatics.org/annovar/annovar_gene.html
10 | variant=chr, pos, ref, alt
11 | variant_info=%(var_info)s
12 |
13 | [DEFAULT]
14 | var_info=region_type
15 | var_info_comment=Fields to be outputted, can be one or both of region_type and region_name.
16 |
17 | [chr]
18 | index=3
19 | type=VARCHAR(20)
20 | adj=RemoveLeading('chr')
21 | comment=Chromosome
22 |
23 | [pos]
24 | index=4
25 | type=INTEGER NOT NULL
26 | comment=1-based position, hg18
27 |
28 | [ref]
29 | index=6
30 | type=VARCHAR(255)
31 | comment=Reference allele, '-' for insertion.
32 |
33 | [alt]
34 | index=7
35 | type=VARCHAR(255)
36 | comment=Alternative allele, '-' for deletion.
37 |
38 | [region_type]
39 | index=1
40 | type=VARCHAR(255)
41 | comment=The genomic region type (i.e., intergenic, ncRNA_intronic, etc) where this variant lies.
42 |
43 | [region_name]
44 | index=2
45 | type=VARCHAR(255)
46 | comment=Genomic region name that corresponds to the region_type. If the variant lies in an intergenic region, this field will specify the closest known regions upstream and downstream of this variant.
47 |
--------------------------------------------------------------------------------
/resources/format/csv.fmt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Format/New for
5 | # a description of the format of this file.
6 |
7 | [format description]
8 | description=Import variants (chr, pos, ref, alt) in csv format, or output arbitrary specified fields in csv format
9 | delimiter=','
10 | variant=chr,pos,ref,alt
11 | export_by=chr,pos,ref,alt
12 | sort_output_by=%(order_by)s
13 |
14 | [DEFAULT]
15 | chr_col=1
16 | chr_col_comment=Column index for the chromosome field
17 |
18 | pos_col=2
19 | pos_col_comment=Column index for the position field
20 |
21 | ref_col=3
22 | ref_col_comment=Column index for the reference field
23 |
24 | alt_col=4
25 | alt_col_comment=Column index for the alternative field
26 |
27 | pos_adj=0
28 | pos_adj_comment=Set to 1 if the input position is zero-based.
29 |
30 | fields=chr,pos,ref,alt
31 | fields_comment=Fields to output, simple arithmetics are allowed (e.g. pos+1) but aggregation functions are not supported.
32 |
33 | order_by=
34 | order_by_comment=Fields used to order output in ascending order.
35 |
36 | [field formatter]
37 | fmt_*=CSVFormatter()
38 | fmt_GT=GenoFormatter(style='numeric')
39 |
40 | [col_1]
41 | field=%(fields)s
42 | adj=JoinRecords(',')
43 | comment=Output all fields as one column
44 |
45 | [col_2]
46 | field=GT
47 | comment=genotype
48 |
49 | [chr]
50 | index=%(chr_col)s
51 | type=VARCHAR(20)
52 | adj=RemoveLeading('chr')
53 | comment=Chromosome
54 |
55 | [pos]
56 | index=%(pos_col)s
57 | adj=IncreaseBy(%(pos_adj)s)
58 | type=INTEGER NOT NULL
59 | comment=1-based position
60 |
61 | [ref]
62 | index=%(ref_col)s
63 | type=VARCHAR(255)
64 | comment=Reference allele, '-' for insertion.
65 |
66 | [alt]
67 | index=%(alt_col)s
68 | type=VARCHAR(255)
69 | comment=Alternative allele, '-' for deletion.
70 |
--------------------------------------------------------------------------------
/resources/format/pileup_indel.fmt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Format/New for
5 | # a description of the format of this file.
6 | #
7 |
8 | [format description]
9 | description=Input format for samtools pileup indel caller. This format imports chr, pos, ref, alt and genotype.
10 | variant=chr, pos, ref, alt
11 | genotype=GT
12 |
13 | [chr]
14 | index=1
15 | type=VARCHAR(20)
16 | adj=RemoveLeading('chr')
17 | comment=Chromosome name
18 |
19 | [pos]
20 | index=2
21 | type=INTEGER NOT NULL
22 | comment=Start position of the indel event.
23 |
24 | [type]
25 | index=3
26 | type=VARCHAR(255)
27 | comment=String summarizing the indel type, one of Dn (deletion of length n) and In (insertion of length n)
28 |
29 | [ref]
30 | index=3,4
31 | type=VARCHAR(255)
32 | # send in I4, AAAA or D1, A. If this is an insertion, ref is -, otherwise ref is the genotype
33 | adj=lambda x: '-' if x[0].startswith('I') else x[1]
34 | comment=reference allele, '-' for insertion
35 |
36 | [alt]
37 | index=3,4
38 | type=VARCHAR(255)
39 | # send in I4, AAAA or D1, A. If this is an insertion, alt is genotype, otherwise alt is '-'
40 | adj=lambda x: '-' if x[0].startswith('D') else x[1]
41 | comment=alternative allele, '-' for deletion
42 |
43 | [GT]
44 | index=6
45 | type=INT
46 | adj=MapValue({'homo': '2', 'hete': '1'})
47 | comment=type of indel (homozygote or heterozygote)
48 |
--------------------------------------------------------------------------------
/resources/format/plink.fmt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Format/New for
5 | # a description of the format of this file.
6 |
7 | [format description]
8 | description=Input format for PLINK dataset. Currently only PLINK binary PED file format is supported (*.bed, *.bim & *.fam)
9 | delimiter=','
10 | variant=chr,pos,ref,alt
11 | genotype=GT
12 | # call a preprocessor to convert input PLINK files to a variant-based format
13 | preprocessor=PlinkConverter($build, chrom_namemap = {'23':'X', '24':'Y', '26':'M'})
14 |
15 | [chr]
16 | index=1
17 | type=VARCHAR(20)
18 | comment=Chromosome
19 |
20 | [pos]
21 | index=2
22 | type=INTEGER NOT NULL
23 | comment=1-based Position of the snp
24 |
25 | [ref]
26 | index=3
27 | type=VARCHAR(255)
28 | comment=Reference allele
29 |
30 | [alt]
31 | index=4
32 | type=VARCHAR(255)
33 | comment=Alternative allele
34 |
35 | [GT]
36 | index=5:
37 | type=INTEGER
38 | adj=Nullify(['3', 'E'])
39 | comment=Gentoype coded as 0 (ref ref), 1 (ref alt) and 2 (alt alt)
40 |
--------------------------------------------------------------------------------
/resources/format/rsname.fmt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Format/New for
5 | # a description of the format of this file.
6 |
7 | [format description]
8 | description=Import variants (chr, pos, ref, alt) that are queried from dbSNP database using provided rsnames
9 | delimiter=','
10 | variant=chr,pos,ref,alt
11 |
12 | [DEFAULT]
13 | sep=','
14 | sep_comment=delimiter used to separate input fields
15 |
16 | rsname_col=1
17 | rsname_col_comment=Index for the column with rsname
18 |
19 | dbfile=dbSNP-hg19_138.DB
20 | dbfile_comment=Name of an attached dbSNP database or path to the dbSNP database in sqlite format
21 |
22 | [chr]
23 | index=%(rsname_col)s
24 | type=VARCHAR(20)
25 | adj=FieldFromDB(dbfile="%(dbfile)s", res_field='chr', cond_fields='name')
26 | comment=Obtain chromosome from dbSNP by rsname
27 |
28 | [pos]
29 | index=%(rsname_col)s
30 | type=VARCHAR(20)
31 | adj=FieldFromDB(dbfile="%(dbfile)s", res_field='pos', cond_fields='name')
32 | comment=Obtain position from dbSNP by rsname
33 |
34 | [ref]
35 | index=%(rsname_col)s
36 | type=VARCHAR(20)
37 | adj=FieldFromDB(dbfile="%(dbfile)s", res_field='ref', cond_fields='name')
38 | comment=Obtain reference allele from dbSNP by rsname
39 |
40 | [alt]
41 | index=%(rsname_col)s
42 | type=VARCHAR(20)
43 | adj=FieldFromDB(dbfile="%(dbfile)s", res_field='alt', cond_fields='name')
44 | comment=Obtain alternative allele from dbSNP by rsname
45 |
--------------------------------------------------------------------------------
/resources/format/twoalleles.fmt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Format/New for
5 | # a description of the format of this file.
6 |
7 | [format description]
8 | description=Import variants (chr, pos, ref, alt) from chr, pos, allele1, and allele2, using a reference genome to determine which one is reference
9 | delimiter=%(sep)s
10 | variant=chr,pos,ref,alt
11 |
12 | [DEFAULT]
13 | sep=','
14 | sep_comment=delimiter used to separate input fields
15 |
16 | ref_genome=hg19
17 | ref_genome_commant=Reference genome of the input data
18 |
19 | chr_col=1
20 | chr_col_comment=Column index for the chromosome field
21 |
22 | pos_col=2
23 | pos_col_comment=Column index for the position field
24 |
25 | a1_col=3
26 | ref_col_comment=Column index for the reference field
27 |
28 | a2_col=4
29 | alt_col_comment=Column index for the alternative field
30 |
31 | [chr]
32 | index=%(chr_col)s
33 | type=VARCHAR(20)
34 | adj=RemoveLeading('chr')
35 | comment=Chromosome
36 |
37 | [pos]
38 | index=%(pos_col)s
39 | type=INTEGER NOT NULL
40 | adj=lambda x: x.split(':')[1]
41 | comment=1-based position
42 |
43 | [ref]
44 | index=%(chr_col)s, %(pos_col)s
45 | type=VARCHAR(20)
46 | adj=RefAtPos(build="%(ref_genome)s")
47 | comment=Obtain reference allele from reference genome
48 |
49 | [alt]
50 | index=%(chr_col)s, %(pos_col)s, %(a1_col)s, %(a2_col)s
51 | type=VARCHAR(20)
52 | adj=AltAtPos(build="%(ref_genome)s")
53 | comment=Obtain reference allele from reference genome
54 |
--------------------------------------------------------------------------------
/src/cgatools/reference/ChromosomeIdField.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2010 Complete Genomics, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License"); you
4 | // may not use this file except in compliance with the License. You
5 | // may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12 | // implied. See the License for the specific language governing
13 | // permissions and limitations under the License.
14 |
15 | #include "cgatools/core.hpp"
16 | #include "cgatools/reference/ChromosomeIdField.hpp"
17 | #include "cgatools/reference/CrrFile.hpp"
18 |
19 | namespace cgatools { namespace reference {
20 |
21 | void ChromosomeIdField::parse(const char* first, const char* last)
22 | {
23 | buf_.assign(first, last);
24 | *id_ = ref_.getChromosomeId(buf_);
25 | }
26 |
27 | } } // cgatools::reference
28 |
--------------------------------------------------------------------------------
/src/cgatools/reference/ChromosomeIdField.hpp:
--------------------------------------------------------------------------------
1 | // Copyright 2010 Complete Genomics, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License"); you
4 | // may not use this file except in compliance with the License. You
5 | // may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12 | // implied. See the License for the specific language governing
13 | // permissions and limitations under the License.
14 |
15 | #ifndef CGATOOLS_REFERENCE_CHROMOSOMEIDFIELD_HPP_
16 | #define CGATOOLS_REFERENCE_CHROMOSOMEIDFIELD_HPP_ 1
17 |
18 | //! @file ChromosomeIdField.hpp
19 |
20 | #include "cgatools/core.hpp"
21 | #include "cgatools/util/DelimitedLineParser.hpp"
22 |
23 | namespace cgatools { namespace reference {
24 |
25 | class CrrFile;
26 |
27 | class ChromosomeIdField : public util::DelimitedFieldParser
28 | {
29 | public:
30 | ChromosomeIdField(const std::string& name, uint16_t* id, const CrrFile& ref)
31 | : DelimitedFieldParser(name),
32 | ref_(ref),
33 | id_(id)
34 | {
35 | }
36 |
37 | void parse(const char* first, const char* last);
38 |
39 | private:
40 | const CrrFile& ref_;
41 | uint16_t* id_;
42 | std::string buf_;
43 | };
44 |
45 | } } // cgatools::reference
46 |
47 | #endif // CGATOOLS_REFERENCE_CHROMOSOMEIDFIELD_HPP_
48 |
--------------------------------------------------------------------------------
/src/cgatools/reference/RepeatMaskerStore.hpp:
--------------------------------------------------------------------------------
1 | // Copyright 2010 Complete Genomics, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License"); you
4 | // may not use this file except in compliance with the License. You
5 | // may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12 | // implied. See the License for the specific language governing
13 | // permissions and limitations under the License.
14 |
15 | #ifndef CGATOOLS_REFERENCE_REPEATMASKERSTORE_HPP_
16 | #define CGATOOLS_REFERENCE_REPEATMASKERSTORE_HPP_ 1
17 |
18 | //! @file RepeatMaskerStore.hpp
19 |
20 | #include "cgatools/core.hpp"
21 | #include "cgatools/reference/RangeAnnotationStore.hpp"
22 |
23 | namespace cgatools { namespace reference {
24 |
25 | struct RepeatMaskerAnnotation
26 | {
27 | std::string name_, family_;
28 | double divergence_;
29 | bool strand_;
30 | };
31 |
32 | class RepeatMaskerStore :
33 | public RangeAnnotationStore
34 | {
35 | public:
36 |
37 | RepeatMaskerStore(const reference::CrrFile& crr, const std::string& fn)
38 | : Base(crr)
39 | {
40 | load(fn);
41 | }
42 |
43 | void bindColumns(util::DelimitedFile& df,
44 | reference::Range& range,
45 | RepeatMaskerAnnotation& data)
46 | {
47 | using namespace util;
48 | bindRangeColumns(df, range);
49 | df.addField(StringField("repName", &data.name_));
50 | df.addField(StringField("repFamily", &data.family_));
51 | df.addField(ValueField("divergence", &data.divergence_));
52 | df.addField(StrandField("strand", &data.strand_));
53 | }
54 | };
55 |
56 | }}
57 |
58 | #endif
59 |
--------------------------------------------------------------------------------
/src/cgatools/util/Exception.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2010 Complete Genomics, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License"); you
4 | // may not use this file except in compliance with the License. You
5 | // may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12 | // implied. See the License for the specific language governing
13 | // permissions and limitations under the License.
14 |
15 | #include "cgatools/core.hpp"
16 | #include "cgatools/util/Exception.hpp"
17 |
18 | namespace cgatools { namespace util {
19 |
20 | Exception::Exception(const std::string& message)
21 | : message_(message)
22 | {
23 | }
24 |
25 | const char* Exception::what() const throw ()
26 | {
27 | return message_.c_str();
28 | }
29 |
30 | } } // cgatools::util
31 |
--------------------------------------------------------------------------------
/src/cgatools/util/GenericHistogram.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2010 Complete Genomics, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License"); you
4 | // may not use this file except in compliance with the License. You
5 | // may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12 | // implied. See the License for the specific language governing
13 | // permissions and limitations under the License.
14 |
15 | #include "cgatools/core.hpp"
16 | #include "GenericHistogram.hpp"
17 | #include
18 |
19 | namespace cgatools { namespace util {
20 |
21 | std::ostream& operator<<(std::ostream& out, const SimpleHistogram& src) {
22 | src.write(out);
23 | return out;
24 | }
25 | void SimpleHistogram::write( std::ostream& out ) const
26 | {
27 | out << "#sum," << sum_ << ",overall," << number_ << std::endl;
28 | out << std::endl;
29 | out << ">bucket,count" << std::endl;
30 | for (size_t ii = 0; ii < count_.size()-1; ++ii) {
31 | out << ii << ',' << count_[ii] << std::endl;
32 | }
33 | out << "over," << count_.back() << std::endl;
34 | }
35 | } }
36 |
--------------------------------------------------------------------------------
/src/cgatools/util/IndirectComparator.hpp:
--------------------------------------------------------------------------------
1 | // Copyright 2010 Complete Genomics, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License"); you
4 | // may not use this file except in compliance with the License. You
5 | // may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12 | // implied. See the License for the specific language governing
13 | // permissions and limitations under the License.
14 |
15 | #ifndef CGATOOLS_UTIL_INDIRECTCOMPARATOR_HPP_
16 | #define CGATOOLS_UTIL_INDIRECTCOMPARATOR_HPP_ 1
17 |
18 | //! @file IndirectComparator.hpp
19 |
20 | #include "cgatools/core.hpp"
21 |
22 | namespace cgatools { namespace util {
23 |
24 | template
25 | class IndirectComparator
26 | {
27 | public:
28 | IndirectComparator(const Container& cc)
29 | : cc_(cc)
30 | {
31 | }
32 |
33 | template
34 | bool operator()(const Index& lhs, const Index& rhs) const
35 | {
36 | return cc_[lhs] < cc_[rhs];
37 | }
38 |
39 | private:
40 | const Container& cc_;
41 | };
42 |
43 | } } // cgatools::util
44 |
45 | #endif // CGATOOLS_UTIL_INDIRECTCOMPARATOR_HPP_
46 |
--------------------------------------------------------------------------------
/src/hdf5-blosc/blosc_filter.h:
--------------------------------------------------------------------------------
1 | #ifndef FILTER_BLOSC_H
2 | #define FILTER_BLOSC_H
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | #include "blosc.h"
9 |
10 | /* Filter revision number, starting at 1 */
11 | /* #define FILTER_BLOSC_VERSION 1 */
12 | #define FILTER_BLOSC_VERSION 2 /* multiple compressors since Blosc 1.3 */
13 |
14 | /* Filter ID registered with the HDF Group */
15 | #define FILTER_BLOSC 32001
16 |
17 | /* Registers the filter with the HDF5 library. */
18 | #if defined(_MSC_VER)
19 | __declspec(dllexport)
20 | #endif /* defined(_MSC_VER) */
21 | int register_blosc(char **version, char **date);
22 |
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 |
27 | #endif
28 |
--------------------------------------------------------------------------------
/src/hdf5-blosc/blosc_plugin.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Dynamically loaded filter plugin for HDF5 blosc filter.
3 | *
4 | * Author: Kiyoshi Masui
5 | * Created: 2014
6 | *
7 | * For compiling, use:
8 | * $ h5cc -fPIC -shared blosc_plugin.c blosc_filter.c -o libH5Zblosc.so -lblosc
9 | *
10 | */
11 |
12 |
13 | #include
14 |
15 |
16 | #define H5Z_class_t_vers 2
17 |
18 | #include "blosc_plugin.h"
19 | #include "blosc_filter.h"
20 |
21 |
22 | /* Prototypes for filter function in blosc_filter.c. */
23 | size_t blosc_filter(unsigned flags, size_t cd_nelmts,
24 | const unsigned cd_values[], size_t nbytes,
25 | size_t* buf_size, void** buf);
26 |
27 | herr_t blosc_set_local(hid_t dcpl, hid_t type, hid_t space);
28 |
29 |
30 | H5Z_class_t blosc_H5Filter[1] = {
31 | {
32 | H5Z_CLASS_T_VERS,
33 | (H5Z_filter_t)(FILTER_BLOSC),
34 | 1, /* encoder_present flag (set to true) */
35 | 1, /* decoder_present flag (set to true) */
36 | "blosc",
37 | /* Filter info */
38 | NULL, /* The "can apply" callback */
39 | (H5Z_set_local_func_t)(blosc_set_local), /* The "set local" callback */
40 | (H5Z_func_t)(blosc_filter), /* The filter function */
41 | }
42 | };
43 |
44 |
45 | H5PL_type_t H5PLget_plugin_type(void) { return H5PL_TYPE_FILTER; }
46 |
47 |
48 | const void* H5PLget_plugin_info(void) { return blosc_H5Filter; }
49 |
--------------------------------------------------------------------------------
/src/hdf5-blosc/blosc_plugin.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Dynamically loaded filter plugin for HDF5 blosc filter.
3 | *
4 | * Author: Kiyoshi Masui
5 | * Created: 2014
6 | *
7 | *
8 | * Header file
9 | * -----------
10 | *
11 | * This provides dynamically loaded HDF5 filter functionality (introduced
12 | * in HDF5-1.8.11, May 2013) to the blosc HDF5 filter.
13 | *
14 | * Usage: compile as a shared library and install either to the default
15 | * search location for HDF5 filter plugins (on Linux
16 | * /usr/local/hdf5/lib/plugin) or to a location pointed to by the
17 | * HDF5_PLUGIN_PATH environment variable.
18 | *
19 | */
20 |
21 |
22 | #ifndef PLUGIN_BLOSC_H
23 | #define PLUGIN_BLOSC_H
24 |
25 | #include "H5PLextern.h"
26 |
27 |
28 | H5PL_type_t H5PLget_plugin_type(void);
29 |
30 |
31 | const void* H5PLget_plugin_info(void);
32 |
33 |
34 | #endif // PLUGIN_BLOSC_H
35 |
36 |
37 |
--------------------------------------------------------------------------------
/src/libplinkio/LICENSE:
--------------------------------------------------------------------------------
1 | /* =====================================================================================
2 | //
3 | // This is a small C and Python library for reading Plink genotype files,
4 | // written by Mattias Franberg, version 0.2.2
5 | //
6 | // https://bitbucket.org/mattias_franberg/libplinkio
7 | //
8 | // This software is not licensed or copyrighted. The varianttools developers
9 | // have been contacting its author and will include the license information when we
10 | // hear from the author, or replace it with alternative implementation if the author
11 | // requests for a removal.
12 | //
13 | ===================================================================================== */
14 |
--------------------------------------------------------------------------------
/src/libplinkio/bim_parse.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2012-2013, Mattias Frånberg
3 | * All rights reserved.
4 | *
5 | * This file is distributed under the Modified BSD License. See the COPYING file
6 | * for details.
7 | */
8 |
9 | #ifndef __BIM_PARSE_H__
10 | #define __BIM_PARSE_H__
11 |
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 |
16 | #include
17 |
18 | /**
19 | * Parses the loci and points the given locus array to a
20 | * the memory that contains them, and writes back the number
21 | * of loci.
22 | *
23 | * @param bim_fp Bim file.
24 | * @param locus The parsed loci will be stored here.
25 | *
26 | * @return PIO_OK if the loci could be parsed, PIO_ERROR otherwise.
27 | */
28 | pio_status_t parse_loci(FILE *bim_fp, UT_array *locus);
29 |
30 | #ifdef __cplusplus
31 | }
32 | #endif
33 |
34 | #endif /* End of __BIM_PARSE_H__ */
35 |
--------------------------------------------------------------------------------
/src/libplinkio/common.h:
--------------------------------------------------------------------------------
1 | #ifndef _COMMON_H_
2 | #define _COMMON_H_
3 |
4 | #include
5 |
6 | /**
7 | * Common integer conversion for python 3 and 2.x.
8 | */
9 | #if PY_MAJOR_VERSION < 3
10 | #define PyLong_FromLong(x) ( (PyObject *) PyInt_FromLong( (long) ( x ) ) )
11 | #define PyLong_AsLong(x) ( (long) PyInt_AsLong( ( x ) ) )
12 | #endif
13 |
14 | #endif
15 |
--------------------------------------------------------------------------------
/src/libplinkio/fam_parse.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2012-2013, Mattias Frånberg
3 | * All rights reserved.
4 | *
5 | * This file is distributed under the Modified BSD License. See the COPYING file
6 | * for details.
7 | */
8 |
9 | #ifndef __FAM_PARSE_H__
10 | #define __FAM_PARSE_H__
11 |
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 |
16 | #include
17 |
18 | #include
19 | #include
20 |
21 | /**
22 | * Parses the samples and points the given sample array to a
23 | * the memory that contains them, and writes back the number
24 | * of samples.
25 | *
26 | * @param fam_fp Fam file.
27 | * @param sample Parsed samples will be stored here.
28 | *
29 | * @return PIO_OK if the samples could be parsed, PIO_ERROR otherwise.
30 | */
31 | pio_status_t parse_samples(FILE *fam_fp, UT_array *sample);
32 |
33 | #ifdef __cplusplus
34 | }
35 | #endif
36 |
37 | #endif /* End of __FAM_PARSE_H__ */
38 |
--------------------------------------------------------------------------------
/src/libplinkio/file.c:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2012-2013, Mattias Frånberg
3 | * All rights reserved.
4 | *
5 | * This file is distributed under the Modified BSD License. See the COPYING file
6 | * for details.
7 | */
8 |
9 | #include
10 | #include
11 | #include
12 |
13 | #include
14 |
15 | /**
16 | * Total size for a buffer containing the copy command
17 | * that will be issued to the shell.
18 | */
19 | #define FILE_COPY_BUFFER_SIZE 4096
20 |
21 | file_status_t
22 | file_copy(const char *from_path, const char *to_path)
23 | {
24 | char *copy_command = malloc( sizeof( char ) * ( strlen( from_path ) + strlen( to_path ) + 5 ) );
25 |
26 | sprintf( copy_command, "cp %s %s", from_path, to_path );
27 | int status = system( copy_command );
28 | free( copy_command );
29 |
30 | if( status != -1 )
31 | {
32 | return FILE_OK;
33 | }
34 | else
35 | {
36 | return FILE_ERROR;
37 | }
38 |
39 | }
40 |
41 | file_status_t
42 | file_remove(const char *path)
43 | {
44 | char *rm_command = malloc( sizeof( char ) * ( strlen( path ) + 4 ) );
45 |
46 | sprintf( rm_command, "rm %s", path );
47 | int status = system( rm_command );
48 | free( rm_command );
49 |
50 | if( status != -1 )
51 | {
52 | return FILE_OK;
53 | }
54 | else
55 | {
56 | return FILE_ERROR;
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/src/libplinkio/file.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2012-2013, Mattias Frånberg
3 | * All rights reserved.
4 | *
5 | * This file is distributed under the Modified BSD License. See the COPYING file
6 | * for details.
7 | */
8 |
9 | #ifndef __FILE_H__
10 | #define __FILE_H__
11 |
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 |
16 | /**
17 | * Defines return values from file operations.
18 | */
19 | enum file_status {
20 | /**
21 | * Means that an operation was successfully performed.
22 | */
23 | FILE_OK,
24 |
25 | /**
26 | * Means that an error occurred.
27 | */
28 | FILE_ERROR
29 | };
30 |
31 | typedef enum file_status file_status_t;
32 |
33 | /**
34 | * Copies a file to another.
35 | *
36 | * @param from_path The path to copy from.
37 | * @param to_path The destination path.
38 | *
39 | * @return FILE_OK if the file was copied, FILE_ERROR otherwise.
40 | */
41 | file_status_t file_copy(const char *from_path, const char *to_path);
42 |
43 | /**
44 | * Removes the file of the given path
45 | *
46 | * @param path Path to the file to remove.
47 | *
48 | * @return FILE_OK if the file was removed, FILE_ERROR otherwise.
49 | */
50 | file_status_t file_remove(const char *path);
51 |
52 | #ifdef __cplusplus
53 | }
54 | #endif
55 |
56 | #endif /* __FILE_H__ */
57 |
--------------------------------------------------------------------------------
/src/libplinkio/snp_lookup.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2012-2013, Mattias Frånberg
3 | * All rights reserved.
4 | *
5 | * This file is distributed under the Modified BSD License. See the COPYING file
6 | * for details.
7 | */
8 |
9 | #ifndef __SNP_LOOKUP_H__
10 | #define __SNP_LOOKUP_H__
11 |
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 |
16 | #if HAVE_ENDIAN_H
17 | #include
18 | #elif HAVE_MACHINE_ENDIAN_H
19 | #include
20 | #elif HAVE_SYS_ENDIAN_H
21 | #include
22 | #endif
23 |
24 | /**
25 | * This files contains a lookup table that maps
26 | * SNPs packed in a single byte into an array of
27 | * four bytes.
28 | */
29 | union snp_lookup_t
30 | {
31 | /**
32 | * Accessible as an array.
33 | */
34 | unsigned char snp_array[4];
35 |
36 | /**
37 | * Accessible as a block of bytes.
38 | */
39 | int32_t snp_block;
40 | };
41 |
42 | #if __BYTE_ORDER == __LITTLE_ENDIAN
43 | #include "snp_lookup_little.h"
44 | #else
45 | #include "snp_lookup_big.h"
46 | #endif /* End test endianess */
47 |
48 | #ifdef __cplusplus
49 | }
50 | #endif
51 |
52 | #endif /* End of __SNP_LOOKUP_H__ */
53 |
--------------------------------------------------------------------------------
/src/libplinkio/status.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2012-2013, Mattias Frånberg
3 | * All rights reserved.
4 | *
5 | * This file is distributed under the Modified BSD License. See the COPYING file
6 | * for details.
7 | */
8 |
9 | #ifndef __STATUS_H__
10 | #define __STATUS_H__
11 |
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 |
16 | enum pio_status_e
17 | {
18 | /**
19 | * Function successful.
20 | */
21 | PIO_OK,
22 |
23 | /**
24 | * File reached EOF.
25 | */
26 | PIO_END,
27 |
28 | /**
29 | * Generic error.
30 | */
31 | PIO_ERROR,
32 |
33 | /**
34 | * FAM IO error.
35 | */
36 | P_FAM_IO_ERROR,
37 |
38 | /**
39 | * BIM IO error.
40 | */
41 | P_BIM_IO_ERROR,
42 |
43 | /**
44 | * Bed IO error.
45 | */
46 | P_BED_IO_ERROR
47 | };
48 |
49 | typedef enum pio_status_e pio_status_t;
50 |
51 | #ifdef __cplusplus
52 | }
53 | #endif
54 |
55 | #endif /* End of __STATUS_H__ */
56 |
--------------------------------------------------------------------------------
/src/sqlite/py2/cache.c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/cache.c
--------------------------------------------------------------------------------
/src/sqlite/py2/cache.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/cache.h
--------------------------------------------------------------------------------
/src/sqlite/py2/connection.c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/connection.c
--------------------------------------------------------------------------------
/src/sqlite/py2/connection.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/connection.h
--------------------------------------------------------------------------------
/src/sqlite/py2/cursor.c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/cursor.c
--------------------------------------------------------------------------------
/src/sqlite/py2/cursor.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/cursor.h
--------------------------------------------------------------------------------
/src/sqlite/py2/module.c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/module.c
--------------------------------------------------------------------------------
/src/sqlite/py2/module.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/module.h
--------------------------------------------------------------------------------
/src/sqlite/py2/prepare_protocol.c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/prepare_protocol.c
--------------------------------------------------------------------------------
/src/sqlite/py2/prepare_protocol.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/prepare_protocol.h
--------------------------------------------------------------------------------
/src/sqlite/py2/row.c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/row.c
--------------------------------------------------------------------------------
/src/sqlite/py2/row.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/row.h
--------------------------------------------------------------------------------
/src/sqlite/py2/sqlitecompat.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/sqlitecompat.h
--------------------------------------------------------------------------------
/src/sqlite/py2/statement.c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/statement.c
--------------------------------------------------------------------------------
/src/sqlite/py2/statement.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/statement.h
--------------------------------------------------------------------------------
/src/sqlite/py2/util.c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/util.c
--------------------------------------------------------------------------------
/src/sqlite/py2/util.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/src/sqlite/py2/util.h
--------------------------------------------------------------------------------
/src/sqlite/py3/prepare_protocol.h:
--------------------------------------------------------------------------------
1 | /* prepare_protocol.h - the protocol for preparing values for SQLite
2 | *
3 | * Copyright (C) 2005-2010 Gerhard Häring
4 | *
5 | * This file is part of pysqlite.
6 | *
7 | * This software is provided 'as-is', without any express or implied
8 | * warranty. In no event will the authors be held liable for any damages
9 | * arising from the use of this software.
10 | *
11 | * Permission is granted to anyone to use this software for any purpose,
12 | * including commercial applications, and to alter it and redistribute it
13 | * freely, subject to the following restrictions:
14 | *
15 | * 1. The origin of this software must not be misrepresented; you must not
16 | * claim that you wrote the original software. If you use this software
17 | * in a product, an acknowledgment in the product documentation would be
18 | * appreciated but is not required.
19 | * 2. Altered source versions must be plainly marked as such, and must not be
20 | * misrepresented as being the original software.
21 | * 3. This notice may not be removed or altered from any source distribution.
22 | */
23 |
24 | #ifndef PYSQLITE_PREPARE_PROTOCOL_H
25 | #define PYSQLITE_PREPARE_PROTOCOL_H
26 | #include "Python.h"
27 |
28 | typedef struct
29 | {
30 | PyObject_HEAD
31 | } pysqlite_PrepareProtocol;
32 |
33 | extern PyTypeObject pysqlite_PrepareProtocolType;
34 |
35 | int pysqlite_prepare_protocol_init(pysqlite_PrepareProtocol* self, PyObject* args, PyObject* kwargs);
36 | void pysqlite_prepare_protocol_dealloc(pysqlite_PrepareProtocol* self);
37 |
38 | int pysqlite_prepare_protocol_setup_types(void);
39 |
40 | #define UNKNOWN (-1)
41 | #endif
42 |
--------------------------------------------------------------------------------
/src/sqlite/py3/row.h:
--------------------------------------------------------------------------------
1 | /* row.h - an enhanced tuple for database rows
2 | *
3 | * Copyright (C) 2005-2010 Gerhard Häring
4 | *
5 | * This file is part of pysqlite.
6 | *
7 | * This software is provided 'as-is', without any express or implied
8 | * warranty. In no event will the authors be held liable for any damages
9 | * arising from the use of this software.
10 | *
11 | * Permission is granted to anyone to use this software for any purpose,
12 | * including commercial applications, and to alter it and redistribute it
13 | * freely, subject to the following restrictions:
14 | *
15 | * 1. The origin of this software must not be misrepresented; you must not
16 | * claim that you wrote the original software. If you use this software
17 | * in a product, an acknowledgment in the product documentation would be
18 | * appreciated but is not required.
19 | * 2. Altered source versions must be plainly marked as such, and must not be
20 | * misrepresented as being the original software.
21 | * 3. This notice may not be removed or altered from any source distribution.
22 | */
23 |
24 | #ifndef PYSQLITE_ROW_H
25 | #define PYSQLITE_ROW_H
26 | #include "Python.h"
27 |
28 | typedef struct _Row
29 | {
30 | PyObject_HEAD
31 | PyObject* data;
32 | PyObject* description;
33 | } pysqlite_Row;
34 |
35 | extern PyTypeObject pysqlite_RowType;
36 |
37 | int pysqlite_row_setup_types(void);
38 |
39 | #endif
40 |
--------------------------------------------------------------------------------
/src/sqlite/py3/util.h:
--------------------------------------------------------------------------------
1 | /* util.h - various utility functions
2 | *
3 | * Copyright (C) 2005-2010 Gerhard Häring
4 | *
5 | * This file is part of pysqlite.
6 | *
7 | * This software is provided 'as-is', without any express or implied
8 | * warranty. In no event will the authors be held liable for any damages
9 | * arising from the use of this software.
10 | *
11 | * Permission is granted to anyone to use this software for any purpose,
12 | * including commercial applications, and to alter it and redistribute it
13 | * freely, subject to the following restrictions:
14 | *
15 | * 1. The origin of this software must not be misrepresented; you must not
16 | * claim that you wrote the original software. If you use this software
17 | * in a product, an acknowledgment in the product documentation would be
18 | * appreciated but is not required.
19 | * 2. Altered source versions must be plainly marked as such, and must not be
20 | * misrepresented as being the original software.
21 | * 3. This notice may not be removed or altered from any source distribution.
22 | */
23 |
24 | #ifndef PYSQLITE_UTIL_H
25 | #define PYSQLITE_UTIL_H
26 | #include "Python.h"
27 | #include "pythread.h"
28 | #include "sqlite3.h"
29 | #include "connection.h"
30 |
31 | int pysqlite_step(sqlite3_stmt* statement, pysqlite_Connection* connection);
32 |
33 | /**
34 | * Checks the SQLite error code and sets the appropriate DB-API exception.
35 | * Returns the error code (0 means no error occurred).
36 | */
37 | int _pysqlite_seterror(sqlite3* db, sqlite3_stmt* st);
38 | #endif
39 |
--------------------------------------------------------------------------------
/src/ucsc/inc/aliType.h:
--------------------------------------------------------------------------------
1 | /* aliType - some definitions for type of alignment. */
2 |
3 | #ifndef ALITYPE_H
4 | #define ALITYPE_H
5 |
6 | enum gfType
7 | /* Types of sequence genoFind deals with. */
8 | {
9 | gftDna = 0, /* DNA (genomic) */
10 | gftRna = 1, /* RNA */
11 | gftProt = 2, /* Protein. */
12 | gftDnaX = 3, /* Genomic DNA translated to protein */
13 | gftRnaX = 4, /* RNA translated to protein */
14 | };
15 |
16 | char *gfTypeName(enum gfType type);
17 | /* Return string representing type. */
18 |
19 | enum gfType gfTypeFromName(char *name);
20 | /* Return type from string. */
21 |
22 | enum ffStringency
23 | /* How tight of a match is required. */
24 | {
25 | ffExact = 0, /* Only an exact match will do. */
26 |
27 | ffCdna = 1, /* Near exact. Tolerate long gaps in target (genomic) */
28 | ffTight = 2, /* Near exact. Not so tolerant of long gaps in target. */
29 | ffLoose = 3, /* Less exact. */
30 | };
31 |
32 | #endif /* ALITYPE_H */
33 |
--------------------------------------------------------------------------------
/src/ucsc/inc/base64.h:
--------------------------------------------------------------------------------
1 | /* Base64 encoding and decoding.
2 | * by Galt Barber */
3 |
4 | #ifndef BASE64_H
5 | #define BASE64_H
6 |
7 | #define B64CHARS "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
8 |
9 | char *base64Encode(char *input, size_t inplen);
10 | /* Use base64 to encode a string. Returns one long encoded
11 | * string which need to be freeMem'd. Note: big-endian algorithm.
12 | * For some applications you may need to break the base64 output
13 | * of this function into lines no longer than 76 chars.
14 | */
15 |
16 | boolean base64Validate(char *input);
17 | /* Return true if input is valid base64.
18 | * Note that the input string is changed by
19 | * eraseWhiteSpace(). */
20 |
21 | char *base64Decode(char *input, size_t *returnSize);
22 | /* Use base64 to decode a string. Return decoded
23 | * string which will be freeMem'd. Note: big-endian algorithm.
24 | * Call eraseWhiteSpace() and check for invalid input
25 | * before passing in input if needed.
26 | * Optionally set retun size for use with binary data.
27 | */
28 |
29 | #endif /* BASE64_H */
30 |
--------------------------------------------------------------------------------
/src/ucsc/inc/filePath.h:
--------------------------------------------------------------------------------
1 | /* filePath - stuff to handle file name parsing. */
2 | #ifndef FILEPATH_H
3 | #define FILEPATH_H
4 |
5 | #include "common.h"
6 |
7 | void splitPath(char *path, char dir[PATH_LEN], char name[FILENAME_LEN],
8 | char extension[FILEEXT_LEN]);
9 | /* Split a full path into components. The dir component will include the
10 | * trailing / if any. The extension component will include the starting
11 | * . if any. Pass in NULL for dir, name, or extension if you don't care about
12 | * that part. */
13 |
14 | char *expandRelativePath(char *baseDir, char *relPath);
15 | /* Expand relative path to more absolute one. */
16 |
17 | char *pathRelativeToFile(char *baseFile, char *relPath);
18 | /* Given a base file name and a path relative to that, return
19 | * relative path interpreted as if it were seen from the
20 | * same directory holding the baseFile.
21 | * An example of using this would be in processing include
22 | * files. In this case the baseFile would be the current
23 | * source file, and the relPath would be from the include
24 | * statement. The returned result could then be used to
25 | * open the include file. */
26 |
27 | void undosPath(char *path);
28 | /* Convert '\' to '/' in path. (DOS/Windows is typically ok with
29 | * this actually.) */
30 |
31 | #endif /* FILEPATH_H */
32 |
--------------------------------------------------------------------------------
/src/ucsc/inc/gfxPoly.h:
--------------------------------------------------------------------------------
1 | /* gfxPoly - two dimensional polygon. */
2 |
3 | #ifndef GFXPOLY_H
4 | #define GFXPOLY_H
5 |
6 | struct gfxPoint
7 | /* A two-dimensional point, typically in pixel coordinates. */
8 | {
9 | struct gfxPoint *next;
10 | int x, y; /* Position */
11 | };
12 |
13 | struct gfxPoly
14 | /* A two-dimensional polygon */
15 | {
16 | struct gfxPoly *next;
17 | int ptCount; /* Number of points. */
18 | struct gfxPoint *ptList; /* First point in list, which is circular. */
19 | struct gfxPoint *lastPoint; /* Last point in list. */
20 | };
21 |
22 | struct gfxPoly *gfxPolyNew();
23 | /* Create new (empty) polygon */
24 |
25 | void gfxPolyFree(struct gfxPoly **pPoly);
26 | /* Free up resources associated with polygon */
27 |
28 | void gfxPolyAddPoint(struct gfxPoly *poly, int x, int y);
29 | /* Add point to polygon. */
30 |
31 | #endif /* GFXPOLY_H */
32 |
--------------------------------------------------------------------------------
/src/ucsc/inc/hmmstats.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * Copyright (C) 2000 Jim Kent. This source code may be freely used *
3 | * for personal, academic, and non-profit purposes. Commercial use *
4 | * permitted only by explicit agreement with Jim Kent (jim_kent@pacbell.net) *
5 | *****************************************************************************/
6 | /* hmmstats.h - Stuff for doing statistical analysis in general and
7 | * hidden Markov models in particular. */
8 | #ifndef HMMSTATS_H
9 | #define HMMSTATS_H
10 |
11 | int scaledLog(double val);
12 | /* Return scaled log of val. */
13 |
14 | #define logScaleFactor 1000
15 | /* Amount we scale logs by. */
16 |
17 | double simpleGaussean(double x);
18 | /* Gaussean distribution with standard deviation 1 and mean 0. */
19 |
20 | double gaussean(double x, double mean, double sd);
21 | /* Gaussean distribution with mean and standard deviation at point x */
22 |
23 | double calcVarianceFromSums(double sum, double sumSquares, bits64 n);
24 | /* Calculate variance. */
25 |
26 | double calcStdFromSums(double sum, double sumSquares, bits64 n);
27 | /* Calculate standard deviation. */
28 |
29 | #endif /* HMMSTATS_H */
30 |
--------------------------------------------------------------------------------
/src/ucsc/inc/https.h:
--------------------------------------------------------------------------------
1 | /* Connect via https. */
2 |
3 | #ifndef NET_HTTPS_H
4 | #define NET_HTTPS_H
5 |
6 | int netConnectHttps(char *hostName, int port);
7 | /* Return socket for https connection with server or -1 if error. */
8 |
9 | #endif//ndef NET_HTTPS_H
10 |
--------------------------------------------------------------------------------
/src/ucsc/inc/internet.h:
--------------------------------------------------------------------------------
1 | /* internet - some stuff for routines that use the internet
2 | * and aren't afraid to include some internet specific structures
3 | * and the like. See also net for stuff that is higher level. */
4 |
5 | #ifndef INTERNET_H
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | bits32 internetHostIp(char *hostName);
12 | /* Get IP v4 address (in host byte order) for hostName.
13 | * Warn and return 0 if there's a problem. */
14 |
15 | boolean internetFillInAddress(char *hostName, int port,
16 | struct sockaddr_in *address);
17 | /* Fill in address. Warn and return FALSE if can't. */
18 |
19 | boolean internetIpToDottedQuad(bits32 ip, char dottedQuad[17]);
20 | /* Convert IP4 address in host byte order to dotted quad
21 | * notation. Warn and return FALSE if there's a
22 | * problem. */
23 |
24 | boolean internetDottedQuadToIp(char *dottedQuad, bits32 *retIp);
25 | /* Convert dotted quad format address to IP4 address in
26 | * host byte order. Warn and return FALSE if there's a
27 | * problem. */
28 |
29 | boolean internetIsDottedQuad(char *s);
30 | /* Returns TRUE if it looks like s is a dotted quad. */
31 |
32 | void internetParseDottedQuad(char *dottedQuad, unsigned char quad[4]);
33 | /* Parse dotted quads into quad */
34 |
35 | void internetUnpackIp(bits32 packed, unsigned char unpacked[4]);
36 | /* Convert from 32 bit to 4-byte format with most significant
37 | * byte first. */
38 |
39 | boolean internetIpInSubnet(unsigned char unpackedIp[4],
40 | unsigned char subnet[4]);
41 | /* Return true if unpacked IP address is in subnet. */
42 |
43 | #endif /* INTERNET_H */
44 |
--------------------------------------------------------------------------------
/src/ucsc/inc/memalloc.h:
--------------------------------------------------------------------------------
1 | /* Let the user redirect where memory allocation/deallocation
2 | * happens. 'careful' routines help debug scrambled heaps.
3 | *
4 | * This file is copyright 2002 Jim Kent, but license is hereby
5 | * granted for all use - public, private or commercial. */
6 |
7 | #ifndef MEMALLOC_H
8 | #define MEMALLOC_H
9 |
10 | struct memHandler
11 | {
12 | struct memHandler *next;
13 | void * (*alloc)(size_t size);
14 | void (*free)(void *vpt);
15 | void * (*realloc)(void* vpt, size_t size);
16 | };
17 |
18 | struct memHandler *pushMemHandler(struct memHandler *newHandler);
19 | /* Use newHandler for memory requests until matching popMemHandler.
20 | * Returns previous top of memory handler stack. */
21 |
22 | struct memHandler *popMemHandler();
23 | /* Removes top element from memHandler stack and returns it. */
24 |
25 | void setDefaultMemHandler();
26 | /* Sets memHandler to the default. */
27 |
28 | void pushCarefulMemHandler(size_t maxAlloc);
29 | /* Push the careful (paranoid, conservative, checks everything)
30 | * memory handler top of the memHandler stack and use it. */
31 |
32 | void carefulCheckHeap();
33 | /* Walk through allocated memory and make sure that all cookies are
34 | * in place. Only walks through what's been done since
35 | * pushCarefulMemHandler(). */
36 |
37 | int carefulCountBlocksAllocated();
38 | /* How many memory items are allocated? (Since called
39 | * pushCarefulMemHandler(). */
40 |
41 | size_t carefulTotalAllocated();
42 | /* Return total bases allocated */
43 |
44 | void setMaxAlloc(size_t s);
45 | /* Set large allocation limit. */
46 |
47 | void memTrackerStart();
48 | /* Push memory handler that will track blocks allocated so that
49 | * they can be automatically released with memTrackerEnd(). */
50 |
51 | void memTrackerEnd();
52 | /* Free any remaining blocks and pop tracker memory handler. */
53 |
54 | #endif /* MEMALLOC_H */
55 |
--------------------------------------------------------------------------------
/src/ucsc/inc/regexHelper.h:
--------------------------------------------------------------------------------
1 | /* regexHelper: easy wrappers on POSIX Extended Regular Expressions (man 7 regex, man 3 regex) */
2 |
3 | #ifndef REGEXHELPER_H
4 | #define REGEXHELPER_H
5 |
6 | #include "common.h"
7 | #include
8 |
9 | const regex_t *regexCompile(const char *exp, const char *description, int compileFlags);
10 | /* Compile exp (or die with an informative-as-possible error message).
11 | * Cache pre-compiled regex's internally (so don't free result after use). */
12 |
13 | boolean regexMatch(const char *string, const char *exp);
14 | /* Return TRUE if string matches regular expression exp (case sensitive). */
15 |
16 | boolean regexMatchNoCase(const char *string, const char *exp);
17 | /* Return TRUE if string matches regular expression exp (case insensitive). */
18 |
19 | boolean regexMatchSubstr(const char *string, const char *exp,
20 | regmatch_t substrArr[], size_t substrArrSize);
21 | /* Return TRUE if string matches regular expression exp (case sensitive);
22 | * regexec fills in substrArr with substring offsets. */
23 |
24 | boolean regexMatchSubstrNoCase(const char *string, const char *exp,
25 | regmatch_t substrArr[], size_t substrArrSize);
26 | /* Return TRUE if string matches regular expression exp (case insensitive);
27 | * regexec fills in substrArr with substring offsets. */
28 |
29 | #endif // REGEXHELPER_H
30 |
--------------------------------------------------------------------------------
/src/ucsc/inc/verbose.h:
--------------------------------------------------------------------------------
1 | /* verbose.h - write out status messages according to the
2 | * current verbosity level. These messages go to stderr. */
3 |
4 | #ifndef VERBOSE_H
5 | #define VERBOSE_H
6 |
7 | void verbose(int verbosity, char *format, ...)
8 | /* Write printf formatted message to log (which by
9 | * default is stderr) if global verbose variable
10 | * is set to verbosity or higher. */
11 | #if defined(__GNUC__)
12 | __attribute__((format(printf, 2, 3)))
13 | #endif
14 | ;
15 |
16 | void verboseVa(int verbosity, char *format, va_list args);
17 | /* Log with at given verbosity vprintf formatted args. */
18 |
19 | void verboseTimeInit(void);
20 | /* Initialize or reinitialize the previous time for use by verboseTime. */
21 |
22 | void verboseTime(int verbosity, char *label, ...)
23 | /* Print label and how long it's been since last call. Start time can be
24 | * initialized with verboseTimeInit, otherwise the elapsed time will be
25 | * zero. */
26 | #if defined(__GNUC__)
27 | __attribute__((format(printf, 2, 3)))
28 | #endif
29 | ;
30 |
31 | void verboseDot();
32 | /* Write I'm alive dot (at verbosity level 1) */
33 |
34 | boolean verboseDotsEnabled();
35 | /* check if outputting of happy dots are enabled. They will be enabled if the
36 | * verbosity is > 0, stderr is a tty and we don't appear to be running an
37 | * emacs shell. */
38 |
39 | int verboseLevel(void);
40 | /* Get verbosity level. */
41 |
42 | void verboseSetLevel(int verbosity);
43 | /* Set verbosity level in log. 0 for no logging,
44 | * higher number for increasing verbosity. */
45 |
46 | void verboseSetLogFile(char *name);
47 | /* Set logFile for verbose messages overrides stderr. */
48 |
49 | FILE *verboseLogFile();
50 | /* Get the verbose log file. */
51 |
52 | #endif /* VERBOSE_H */
53 |
--------------------------------------------------------------------------------
/src/ucsc/inc/zlibFace.h:
--------------------------------------------------------------------------------
1 | /* Wrappers around zlib to make interfacing to it a bit easier. */
2 |
3 | #ifndef ZLIBFACE_H
4 | #define ZLIBFACE_H
5 |
6 | size_t zCompress(
7 | void *uncompressed, /* Start of area to compress. */
8 | size_t uncompressedSize, /* Size of area to compress. */
9 | void *compBuf, /* Where to put compressed bits */
10 | size_t compBufSize); /* Size of compressed bits - calculate using zCompBufSize */
11 | /* Compress data from memory to memory. Returns size after compression. */
12 |
13 | size_t zCompBufSize(size_t uncompressedSize);
14 | /* Return size of buffer needed to compress something of given size uncompressed. */
15 |
16 | size_t zUncompress(
17 | void *compressed, /* Compressed area */
18 | size_t compressedSize, /* Size after compression */
19 | void *uncompBuf, /* Where to put uncompressed bits */
20 | size_t uncompBufSize); /* Max size of uncompressed bits. */
21 | /* Uncompress data from memory to memory. Returns size after decompression. */
22 |
23 | void zSelfTest(int count);
24 | /* Run an internal diagnostic. */
25 |
26 | #endif /* ZLIBFACE_H */
27 |
--------------------------------------------------------------------------------
/src/ucsc/lib/aliType.c:
--------------------------------------------------------------------------------
1 | /* aliType - some definitions for type of alignment. */
2 | #include "common.h"
3 | #include "aliType.h"
4 |
5 |
6 | char *gfTypeName(enum gfType type)
7 | /* Return string representing type. */
8 | {
9 | if (type == gftDna) return "DNA";
10 | if (type == gftRna) return "RNA";
11 | if (type == gftProt) return "protein";
12 | if (type == gftDnaX) return "DNAX";
13 | if (type == gftRnaX) return "RNAX";
14 | internalErr();
15 | return NULL;
16 | }
17 |
18 | enum gfType gfTypeFromName(char *name)
19 | /* Return type from string. */
20 | {
21 | if (sameWord(name, "DNA")) return gftDna;
22 | if (sameWord(name, "RNA")) return gftRna;
23 | if (sameWord(name, "protein")) return gftProt;
24 | if (sameWord(name, "prot")) return gftProt;
25 | if (sameWord(name, "DNAX")) return gftDnaX;
26 | if (sameWord(name, "RNAX")) return gftRnaX;
27 | errAbort("Unknown sequence type '%s'", name);
28 | return 0;
29 | }
30 |
--------------------------------------------------------------------------------
/src/ucsc/lib/hmmstats.c:
--------------------------------------------------------------------------------
1 | /* hmmstats.c - Stuff for doing statistical analysis in general and
2 | * hidden Markov models in particular.
3 | *
4 | * This file is copyright 2002 Jim Kent, but license is hereby
5 | * granted for all use - public, private or commercial. */
6 |
7 | #include "common.h"
8 | #include "hmmstats.h"
9 |
10 |
11 | int scaledLog(double val)
12 | /* Return scaled log of val. */
13 | {
14 | return round(logScaleFactor * log(val));
15 | }
16 |
17 | double oneOverSqrtTwoPi = 0.39894228;
18 |
19 | double simpleGaussean(double x)
20 | /* Gaussean distribution with standard deviation 1 and mean 0. */
21 | {
22 | return oneOverSqrtTwoPi * exp(-0.5*x*x );
23 | }
24 |
25 | double gaussean(double x, double mean, double sd)
26 | /* Gaussean distribution with mean and standard deviation at point x */
27 | {
28 | x -= mean;
29 | x /= sd;
30 | return oneOverSqrtTwoPi * exp(-0.5*x*x) / sd;
31 | }
32 |
33 | double calcVarianceFromSums(double sum, double sumSquares, bits64 n)
34 | /* Calculate variance. */
35 | {
36 | double var = sumSquares - sum*sum/n;
37 | if (n > 1)
38 | var /= n-1;
39 | return var;
40 | }
41 |
42 | double calcStdFromSums(double sum, double sumSquares, bits64 n)
43 | /* Calculate standard deviation. */
44 | {
45 | return sqrt(calcVarianceFromSums(sum, sumSquares, n));
46 | }
47 |
--------------------------------------------------------------------------------
/src/ucsc/lib/portimpl.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * Copyright (C) 2000 Jim Kent. This source code may be freely used *
3 | * for personal, academic, and non-profit purposes. Commercial use *
4 | * permitted only by explicit agreement with Jim Kent (jim_kent@pacbell.net) *
5 | *****************************************************************************/
6 | /* Implement portable stuff.... */
7 |
8 | /* There is one of the following structures for each web server
9 | * we support. During run time looking at the environment variable
10 | * SERVER_SOFTWARE we decide which of these to use. */
11 | struct webServerSpecific
12 | {
13 | char *name;
14 |
15 | /* Make a good name for a temp file. */
16 | void (*makeTempName)(struct tempName *tn, char *base, char *suffix);
17 |
18 | /* Return directory to look for cgi in. */
19 | char * (*cgiDir)();
20 |
21 | #ifdef NEVER
22 | /* Return cgi suffix. */
23 | char * (*cgiSuffix)();
24 | #endif /* NEVER */
25 |
26 | /* Return relative speed of CPU. (UCSC CSE 1999 FTP machine is 1.0) */
27 | double (*speed)();
28 |
29 | /* The relative path to trash directory for CGI binaries */
30 | char * (*trashDir)();
31 |
32 | };
33 |
34 |
35 | extern struct webServerSpecific wssMicrosoftII, wssMicrosoftPWS, wssDefault,
36 | wssLinux, wssCommandLine, wssBrcMcw;
37 |
38 | char *rTempName(char *dir, char *base, char *suffix);
39 | /* Make a temp name that's almost certainly unique. */
40 |
--------------------------------------------------------------------------------
/src/ucsc/lib/servBrcMcw.c:
--------------------------------------------------------------------------------
1 | /* Stuff that's specific for .brc.mcw.edu server goes here.
2 | *
3 | * This file is copyright 2004 Jim Kent, but license is hereby
4 | * granted for all use - public, private or commercial. */
5 |
6 | #include "common.h"
7 | #include "portable.h"
8 | #include "portimpl.h"
9 | #include "obscure.h"
10 | #include "hash.h"
11 |
12 |
13 | static char *__trashDir = "/trash";
14 |
15 | static void _makeTempName(struct tempName *tn, char *base, char *suffix)
16 | /* Figure out a temp name, and how CGI and HTML will access it. */
17 | {
18 | char *tname;
19 |
20 | tname = rTempName(__trashDir, base, suffix);
21 | strcpy(tn->forCgi, tname);
22 | strcpy(tn->forHtml, tname);
23 | }
24 |
25 | static char *_cgiDir()
26 | {
27 | return "/cgi-bin/";
28 | }
29 |
30 | static char *_trashDir()
31 | {
32 | return __trashDir;
33 | }
34 |
35 | static double _speed()
36 | {
37 | return 3.0;
38 | }
39 |
40 | struct webServerSpecific wssBrcMcw =
41 | {
42 | "default",
43 | _makeTempName,
44 | _cgiDir,
45 | _speed,
46 | _trashDir,
47 | };
48 |
--------------------------------------------------------------------------------
/src/ucsc/lib/servCrunx.c:
--------------------------------------------------------------------------------
1 | /* Stuff that's specific for local linux server goes here.
2 | *
3 | * This file is copyright 2002 Jim Kent, but license is hereby
4 | * granted for all use - public, private or commercial. */
5 |
6 | #include "common.h"
7 | #include "portable.h"
8 | #include "portimpl.h"
9 | #include "obscure.h"
10 |
11 |
12 | static char *__trashDir = "/home/httpd/html/trash";
13 |
14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix)
15 | /* Figure out a temp name, and how CGI and HTML will access it. */
16 | {
17 | char *tname;
18 | char *tempDirCgi = __trashDir;
19 | char *tempDirHtml = "/trash";
20 | int tlcLen = strlen(tempDirCgi);
21 | int tlhLen = strlen(tempDirHtml);
22 |
23 | tname = rTempName(tempDirCgi, base, suffix);
24 | strcpy(tn->forCgi, tname);
25 | memcpy(tn->forHtml, tempDirHtml, tlhLen);
26 | strcpy(tn->forHtml+tlhLen, tn->forCgi+tlcLen);
27 | }
28 |
29 | static char *_cgiDir()
30 | {
31 | return "../cgi-bin/";
32 | }
33 |
34 | static char *_trashDir()
35 | {
36 | return __trashDir;
37 | }
38 |
39 | static double _speed()
40 | {
41 | return 3.0;
42 | }
43 |
44 | struct webServerSpecific wssLinux =
45 | {
46 | "linux",
47 | _makeTempName,
48 | _cgiDir,
49 | _speed,
50 | _trashDir,
51 | };
52 |
--------------------------------------------------------------------------------
/src/ucsc/lib/servcis.c:
--------------------------------------------------------------------------------
1 | /* Stuff that's specific for Comp Science dept. web server goes here.
2 | *
3 | * This file is copyright 2002 Jim Kent, but license is hereby
4 | * granted for all use - public, private or commercial. */
5 |
6 | #include "common.h"
7 | #include "portable.h"
8 | #include "portimpl.h"
9 | #include "obscure.h"
10 | #include "hash.h"
11 |
12 |
13 | static char *__trashDir = "../trash";
14 |
15 | static void _makeTempName(struct tempName *tn, char *base, char *suffix)
16 | /* Figure out a temp name, and how CGI and HTML will access it. */
17 | {
18 | char *tname;
19 |
20 | tname = rTempName(__trashDir, base, suffix);
21 | strcpy(tn->forCgi, tname);
22 | strcpy(tn->forHtml, tname);
23 | }
24 |
25 | static char *_cgiDir()
26 | {
27 | return "../cgi-bin/";
28 | }
29 |
30 | static char *_trashDir()
31 | {
32 | return __trashDir;
33 | }
34 |
35 | static double _speed()
36 | {
37 | return 3.0;
38 | }
39 |
40 |
41 | struct webServerSpecific wssDefault =
42 | {
43 | "default",
44 | _makeTempName,
45 | _cgiDir,
46 | _speed,
47 | _trashDir,
48 | };
49 |
--------------------------------------------------------------------------------
/src/ucsc/lib/servcl.c:
--------------------------------------------------------------------------------
1 | /* "Web Server" for command line execution.
2 | *
3 | * This file is copyright 2002 Jim Kent, but license is hereby
4 | * granted for all use - public, private or commercial. */
5 |
6 | #include "common.h"
7 | #include "portable.h"
8 | #include "portimpl.h"
9 | #include "obscure.h"
10 |
11 |
12 | static char *__trashDir = ".";
13 |
14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix)
15 | /* Figure out a temp name, and how CGI and HTML will access it. */
16 | {
17 | char *tname = rTempName(__trashDir, base, suffix);
18 | strcpy(tn->forCgi, tname);
19 | strcpy(tn->forHtml, tn->forCgi);
20 | }
21 |
22 | static char *_cgiDir()
23 | {
24 | char *jkwebDir;
25 | if ((jkwebDir = getenv("JKWEB")) == NULL)
26 | return "";
27 | else
28 | return jkwebDir;
29 | }
30 |
31 | static char *_trashDir()
32 | {
33 | return __trashDir;
34 | }
35 |
36 | static double _speed()
37 | {
38 | return 1.0;
39 | }
40 |
41 |
42 | struct webServerSpecific wssCommandLine =
43 | {
44 | "commandLine",
45 | _makeTempName,
46 | _cgiDir,
47 | _speed,
48 | _trashDir,
49 | };
50 |
--------------------------------------------------------------------------------
/src/ucsc/lib/servmsII.c:
--------------------------------------------------------------------------------
1 | /* Stuff that's specific for the MS II Web Server goes here.
2 | *
3 | * This file is copyright 2002 Jim Kent, but license is hereby
4 | * granted for all use - public, private or commercial. */
5 |
6 | #include "common.h"
7 | #include "portable.h"
8 | #include "portimpl.h"
9 | #include "obscure.h"
10 |
11 |
12 | static char *__trashDir = "..\\trash";
13 |
14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix)
15 | /* Figure out a temp name, and how CGI and HTML will access it. */
16 | {
17 | long tempIx = incCounterFile("tcounter");
18 | sprintf(tn->forCgi, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix);
19 | sprintf(tn->forHtml, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix);
20 | }
21 |
22 | static char *_cgiDir()
23 | {
24 | return "";
25 | }
26 |
27 | static char *_trashDir()
28 | {
29 | return __trashDir;
30 | }
31 |
32 | static double _speed()
33 | {
34 | return 2.5;
35 | }
36 |
37 |
38 | struct webServerSpecific wssMicrosoftII =
39 | {
40 | "Microsoft-IIS",
41 | _makeTempName,
42 | _cgiDir,
43 | _speed,
44 | _trashDir,
45 | };
46 |
--------------------------------------------------------------------------------
/src/ucsc/lib/servpws.c:
--------------------------------------------------------------------------------
1 | /* Stuff that's specific for the Personal Web Server goes here.
2 | *
3 | * This file is copyright 2002 Jim Kent, but license is hereby
4 | * granted for all use - public, private or commercial. */
5 |
6 | #include "common.h"
7 | #include "portable.h"
8 | #include "portimpl.h"
9 | #include "obscure.h"
10 |
11 |
12 | static char *__trashDir = "..\\trash";
13 |
14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix)
15 | /* Figure out a temp name, and how CGI and HTML will access it. */
16 | {
17 | long tempIx = incCounterFile("tcounter");
18 | sprintf(tn->forCgi, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix);
19 | sprintf(tn->forHtml, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix);
20 | }
21 |
22 | static char *_cgiDir()
23 | {
24 | return "../cgi-bin/";
25 | }
26 |
27 | static char *_trashDir()
28 | {
29 | return __trashDir;
30 | }
31 |
32 | static double _speed()
33 | {
34 | return 1.25;
35 | }
36 |
37 | struct webServerSpecific wssMicrosoftPWS =
38 | {
39 | "Microsoft-PWS",
40 | _makeTempName,
41 | _cgiDir,
42 | _speed,
43 | _trashDir,
44 | };
45 |
--------------------------------------------------------------------------------
/src/ucsc/samtools/sam_header.h:
--------------------------------------------------------------------------------
1 | #ifndef __SAM_HEADER_H__
2 | #define __SAM_HEADER_H__
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | void *sam_header_parse2(const char *headerText);
9 | void *sam_header_merge(int n, const void **dicts);
10 | void sam_header_free(void *header);
11 | char *sam_header_write(const void *headerDict); // returns a newly allocated string
12 |
13 | /*
14 | // Usage example
15 | const char *key, *val;
16 | void *iter = sam_header_parse2(bam->header->text);
17 | while ( iter = sam_header_key_val(iter, "RG","ID","SM" &key,&val) ) printf("%s\t%s\n", key,val);
18 | */
19 | void *sam_header2key_val(void *iter, const char type[2], const char key_tag[2], const char value_tag[2], const char **key, const char **value);
20 | char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n);
21 |
22 | /*
23 | // Usage example
24 | int i, j, n;
25 | const char *tags[] = {"SN","LN","UR","M5",NULL};
26 | void *dict = sam_header_parse2(bam->header->text);
27 | char **tbl = sam_header2tbl_n(h->dict, "SQ", tags, &n);
28 | for (i=0; i
5 |
6 | static inline int bam_is_big_endian()
7 | {
8 | long one= 1;
9 | return !(*((char *)(&one)));
10 | }
11 | static inline uint16_t bam_swap_endian_2(uint16_t v)
12 | {
13 | return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8));
14 | }
15 | static inline void *bam_swap_endian_2p(void *x)
16 | {
17 | *(uint16_t*)x = bam_swap_endian_2(*(uint16_t*)x);
18 | return x;
19 | }
20 | static inline uint32_t bam_swap_endian_4(uint32_t v)
21 | {
22 | v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
23 | return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
24 | }
25 | static inline void *bam_swap_endian_4p(void *x)
26 | {
27 | *(uint32_t*)x = bam_swap_endian_4(*(uint32_t*)x);
28 | return x;
29 | }
30 | static inline uint64_t bam_swap_endian_8(uint64_t v)
31 | {
32 | v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
33 | v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
34 | return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
35 | }
36 | static inline void *bam_swap_endian_8p(void *x)
37 | {
38 | *(uint64_t*)x = bam_swap_endian_8(*(uint64_t*)x);
39 | return x;
40 | }
41 |
42 | #endif
43 |
--------------------------------------------------------------------------------
/src/ucsc/tabix/knetfile.h:
--------------------------------------------------------------------------------
1 | #ifndef KNETFILE_H
2 | #define KNETFILE_H
3 |
4 | #include
5 | #include
6 |
7 | #ifndef _WIN32
8 | #define netread(fd, ptr, len) read(fd, ptr, len)
9 | #define netwrite(fd, ptr, len) write(fd, ptr, len)
10 | #define netclose(fd) close(fd)
11 | #else
12 | #include
13 | #define netread(fd, ptr, len) recv(fd, ptr, len, 0)
14 | #define netwrite(fd, ptr, len) send(fd, ptr, len, 0)
15 | #define netclose(fd) closesocket(fd)
16 | #endif
17 |
18 | // FIXME: currently I/O is unbuffered
19 |
20 | #define KNF_TYPE_LOCAL 1
21 | #define KNF_TYPE_FTP 2
22 | #define KNF_TYPE_HTTP 3
23 |
24 | typedef struct knetFile_s {
25 | int type, fd;
26 | int64_t offset;
27 | char *host, *port;
28 |
29 | // the following are for FTP only
30 | int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready;
31 | char *response, *retr, *size_cmd;
32 | int64_t seek_offset; // for lazy seek
33 | int64_t file_size;
34 |
35 | // the following are for HTTP only
36 | char *path, *http_host;
37 | } knetFile;
38 |
39 | #define knet_tell(fp) ((fp)->offset)
40 | #define knet_fileno(fp) ((fp)->fd)
41 |
42 | #ifdef __cplusplus
43 | extern "C" {
44 | #endif
45 |
46 | #ifdef _WIN32
47 | int knet_win32_init();
48 | void knet_win32_destroy();
49 | #endif
50 |
51 | knetFile *knet_open(const char *fn, const char *mode);
52 |
53 | /*
54 | This only works with local files.
55 | */
56 | knetFile *knet_dopen(int fd, const char *mode);
57 |
58 | /*
59 | If ->is_ready==0, this routine updates ->fd; otherwise, it simply
60 | reads from ->fd.
61 | */
62 | off_t knet_read(knetFile *fp, void *buf, off_t len);
63 |
64 | /*
65 | This routine only sets ->offset and ->is_ready=0. It does not
66 | communicate with the FTP server.
67 | */
68 | off_t knet_seek(knetFile *fp, int64_t off, int whence);
69 | int knet_close(knetFile *fp);
70 |
71 | #ifdef __cplusplus
72 | }
73 | #endif
74 |
75 | #endif
76 |
--------------------------------------------------------------------------------
/src/ucsc/tabix/kstring.h:
--------------------------------------------------------------------------------
1 | #ifndef KSTRING_H
2 | #define KSTRING_H
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | #ifndef kroundup32
9 | #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
10 | #endif
11 |
12 | #ifndef KSTRING_T
13 | #define KSTRING_T kstring_t
14 | typedef struct __kstring_t {
15 | size_t l, m;
16 | char *s;
17 | } kstring_t;
18 | #endif
19 |
20 | int ksprintf(kstring_t *s, const char *fmt, ...);
21 | int ksplit_core(char *s, int delimiter, int *_max, int **_offsets);
22 |
23 | // calculate the auxiliary array, allocated by calloc()
24 | int *ksBM_prep(const uint8_t *pat, int m);
25 |
26 | /* Search pat in str and returned the list of matches. The size of the
27 | * list is returned as n_matches. _prep is the array returned by
28 | * ksBM_prep(). If it is a NULL pointer, ksBM_prep() will be called. */
29 | int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches);
30 |
31 | static inline int kputsn(const char *p, int l, kstring_t *s)
32 | {
33 | if (s->l + l + 1 >= s->m) {
34 | s->m = s->l + l + 2;
35 | kroundup32(s->m);
36 | s->s = (char*)realloc(s->s, s->m);
37 | }
38 | strncpy(s->s + s->l, p, l);
39 | s->l += l;
40 | s->s[s->l] = 0;
41 | return l;
42 | }
43 |
44 | static inline int kputs(const char *p, kstring_t *s)
45 | {
46 | return kputsn(p, strlen(p), s);
47 | }
48 |
49 | static inline int kputc(int c, kstring_t *s)
50 | {
51 | if (s->l + 1 >= s->m) {
52 | s->m = s->l + 2;
53 | kroundup32(s->m);
54 | s->s = (char*)realloc(s->s, s->m);
55 | }
56 | s->s[s->l++] = c;
57 | s->s[s->l] = 0;
58 | return c;
59 | }
60 |
61 | static inline int *ksplit(kstring_t *s, int delimiter, int *n)
62 | {
63 | int max = 0, *offsets = 0;
64 | *n = ksplit_core(s->s, delimiter, &max, &offsets);
65 | return offsets;
66 | }
67 |
68 | #endif
69 |
--------------------------------------------------------------------------------
/src/variant_tools/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # This file is part of variant_tools, a software application to annotate,
4 | # summarize, and filter variants for next-gen sequencing ananlysis.
5 | # Please visit https://github.com/vatlab/varianttools for details.
6 | #
7 | # Copyright (C) 2011 - 2020 - 2013 Bo Peng (bpeng@mdanderson.org)
8 | #
9 | # This program is free software: you can redistribute it and/or modify
10 | # it under the terms of the GNU General Public License as published by
11 | # the Free Software Foundation, either version 3 of the License, or
12 | # (at your option) any later version.
13 | #
14 | # This program is distributed in the hope that it will be useful,
15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | # GNU General Public License for more details.
18 | #
19 | # You should have received a copy of the GNU General Public License
20 | # along with this program. If not, see .
21 | #
22 |
--------------------------------------------------------------------------------
/src/variant_tools/_version.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # This file is part of variant_tools, a software application to annotate,
4 | # summarize, and filter variants for next-gen sequencing ananlysis.
5 | # Please visit https://github.com/vatlab/varianttools for details.
6 | #
7 | # Copyright (C) 2011 - 2020 - 2013 Bo Peng (bpeng@mdanderson.org)
8 | #
9 | # This program is free software: you can redistribute it and/or modify
10 | # it under the terms of the GNU General Public License as published by
11 | # the Free Software Foundation, either version 3 of the License, or
12 | # (at your option) any later version.
13 | #
14 | # This program is distributed in the hope that it will be useful,
15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | # GNU General Public License for more details.
18 | #
19 | # You should have received a copy of the GNU General Public License
20 | # along with this program. If not, see .
21 | #
22 |
23 | import sys
24 |
25 | VTOOLS_VERSION = '3.1.4'
26 |
27 | pyver = sys.version_info
28 | VTOOLS_FULL_VERSION = '{} for Python {}.{}.{}'.format(VTOOLS_VERSION,
29 | pyver.major, pyver.minor,
30 | pyver.micro)
31 | VTOOLS_COPYRIGHT = '''variant tools {} : Copyright (c) 2011 - 2016 Bo Peng'''.format(
32 | VTOOLS_VERSION)
33 | VTOOLS_CONTACT = '''Please visit https://github.com/vatlab/varianttools for more information.'''
34 |
--------------------------------------------------------------------------------
/src/variant_tools/checking_asso_result.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | def checking_association(sql_file, hdf5_file):
4 | with open(str(sql_file)) as f1:
5 | result_sql = {}
6 | for line in f1:
7 | fields = line.split()
8 | if len(fields) == 7:
9 | result_sql[fields[0]] = fields[1:7]
10 |
11 | with open(str(hdf5_file)) as f2:
12 | result_h5 = {}
13 | for line in f2:
14 | fields = line.split()
15 | if len(fields) == 7:
16 | result_h5[fields[0]] = fields[1:7]
17 |
18 | count=0
19 |
20 | for key in result_h5 :
21 | if result_h5[key] != result_sql[key]:
22 | count+=1
23 | print("Values in %s are different:\n" % (key)," %s \n" % (result_sql["refgene_name2"]) ," in sql: %s \n in hdf5: %s" % (result_sql[key], result_h5[key]))
24 |
25 | if count!=0:
26 | print("there are total %d different results." % count)
27 | else:
28 | print("All result are the same!")
29 |
30 | if __name__ == "__main__":
31 |
32 | parser = argparse.ArgumentParser(description="the two files path and name, for sqlite and -for hdf5")
33 | parser.add_argument("-sql",
34 | help="for sqlite and -for sqlite")
35 | parser.add_argument("-h5",
36 | help="for hdf5 and -for hdf5")
37 |
38 | args = parser.parse_args()
39 | checking_association(args.sql, args.h5)
40 |
--------------------------------------------------------------------------------
/src/variant_tools/genotypes.h:
--------------------------------------------------------------------------------
1 | #include
2 | #ifndef _GENO_H
3 | #define _GENO_H
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 | void get_Genotypes(char* chr, int variant_id,int* samples,int numberOfSamples, char* genoFilter, int* sample_IDs);
8 | #ifdef __cplusplus
9 | }
10 | #endif
11 | #endif
12 |
--------------------------------------------------------------------------------
/src/variant_tools/vtools_association_cluster.lsf:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #BSUB -W 1:00
3 | #BSUB -J vtools_asociation_cluster
4 | #BSUB -n 32
5 | #BSUB -N
6 | #BSUB -M 16384
7 | #BSUB -R "span[ptile=8]"
8 | #BSUB -q short
9 |
10 |
11 | [[ -z "${PROJECTFOLDER}" ]] && PROJECTFOLDER='path to project folder' || PROJECTFOLDER="${PROJECTFOLDER}"
12 | COMMAND='vtools associate variant disease --discard_variants %(NA)>0.1 --method "BurdenBt --name BurdenTest --alternative 2" --group_by refgene.name2 -j 8 -v 2 -mpi'
13 | NUMBER_OF_PROCESSES=24
14 |
15 | #LSB_HOSTS has the name of all the nodes running the jobs.
16 | NODE_LIST=($LSB_HOSTS)
17 |
18 | #The main program is running on the node with $HOSTNAME, the rest of nodes are saved into $WORKDER_NODES and written into hostlist.txt.
19 | WORKER_NODES=()
20 | for node in "${NODE_LIST[@]}";
21 | do
22 | if [ "$node" != "$HOSTNAME" ]; then
23 | if [[ ! " ${WORKER_NODES[@]} " =~ " ${node} " ]]; then
24 | WORKER_NODES+=($node)
25 | fi
26 | fi
27 | done
28 |
29 | export ZEROMQIP=$(hostname --ip-address)
30 | export PROJECTFOLDER
31 | HOSTFILE="$PROJECTFOLDER/hostlist.txt"
32 |
33 | rm -rf $HOSTFILE
34 | for node in "${WORKER_NODES[@]}";
35 | do
36 | echo "$node slots=8" >> $HOSTFILE
37 | done
38 |
39 | #Get the path for mpiexec
40 | MPIEXECPATH=$(which mpiexec)
41 |
42 | #The IP address for the main node $ZEROMQIP and $PROJECTFOLDER are needed for the woker script to communicate.
43 | $MPIEXECPATH -d -x PATH -H $HOSTNAME -np 1 -wdir $PROJECTFOLDER $COMMAND : -x ZEROMQIP -x PROJECTFOLDER -x PATH -hostfile $HOSTFILE -np $NUMBER_OF_PROCESSES worker_run
44 |
--------------------------------------------------------------------------------
/src/variant_tools/vtools_association_cluster.pbs:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #PBS -l nodes=4:lowmem:ppn=8,walltime=01:00:00
3 | #PBS -V
4 | #PBS -q short
5 |
6 |
7 | [[ -z "${PROJECTFOLDER}" ]] && PROJECTFOLDER='path to project folder' || PROJECTFOLDER="${PROJECTFOLDER}"
8 | COMMAND='vtools associate variant disease --discard_variants %(NA)>0.1 --method "BurdenBt --name BurdenTest --alternative 2" --group_by refgene.name2 -j 8 -v 2 -mpi'
9 | NUMBER_OF_PROCESSES_PER_NODE=8
10 |
11 | #PBS_NODEFILE has the name of all the nodes running the jobs.
12 | NODE_LIST=`cat $PBS_NODEFILE | uniq`
13 | nodes=(`echo $NODE_LIST | cut -d " " --output-delimiter=" " -f 1-`)
14 |
15 | #The main program is running on the node with $HOSTNAME, the rest of nodes are saved into $WORKDER_NODES and written into hostlist.txt.
16 | WORKER_NODES=()
17 | for node in "${nodes[@]}"
18 | do
19 | if [ "$node" != "$HOSTNAME" ]; then
20 | WORKER_NODES+=($node)
21 | fi
22 | done
23 |
24 | export ZEROMQIP=$(ifconfig | sed -En 's/127.0.0.1//;s/.*inet (addr:)?(([0-9]*\.){3}[0-9]*).*/\2/p')
25 | export PROJECTFOLDER
26 | HOSTFILE="$PROJECTFOLDER/hostlist.txt"
27 |
28 | rm -rf $HOSTFILE
29 | for node in "${WORKER_NODES[@]}";
30 | do
31 | echo $node >> $HOSTFILE
32 | done
33 |
34 | #Get the path for mpiexec
35 | MPIEXECPATH=$(which mpiexec)
36 | #The IP address for the main node $ZEROMQIP and $PROJECTFOLDER are needed for the woker script to communicate.
37 | $MPIEXECPATH -d -x PATH -H $HOSTNAME -np 1 -wdir $PROJECTFOLDER $COMMAND : -x ZEROMQIP -x PROJECTFOLDER -x PATH -hostfile $HOSTFILE -npernode $NUMBER_OF_PROCESSES_PER_NODE worker_run
38 |
--------------------------------------------------------------------------------
/test/ann/testNSFP.DB.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/ann/testNSFP.DB.gz
--------------------------------------------------------------------------------
/test/ann/testNSFP.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/ann/testNSFP.zip
--------------------------------------------------------------------------------
/test/ann/testThousandGenomes.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/ann/testThousandGenomes.zip
--------------------------------------------------------------------------------
/test/fmt/basic_hg18.fmt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Format/New for
5 | # a description of the format of this file.
6 |
7 | [format description]
8 | description=A basic variant input format with four columns: chr, pos, ref, alt.
9 | variant=chr,%(pos)s,%(ref)s,%(alt)s
10 |
11 | [DEFAULT]
12 | pos=pos
13 | pos_comment=Field for position. To export indel, set it to 'pos-length(upstream)'
14 |
15 | ref=ref
16 | ref_comment=Field for reference allele.
17 |
18 | alt=alt
19 | alt_comment=Field for alternative allele.
20 |
21 | [chr]
22 | index=1
23 | type=VARCHAR(20)
24 | adj=RemoveLeading('chr')
25 | comment=Chromosome
26 |
27 | [pos]
28 | index=2
29 | type=INTEGER NOT NULL
30 | comment=1-based position, hg18
31 |
32 | [ref]
33 | index=4
34 | type=VARCHAR(255)
35 | comment=Reference allele, '-' for insertion.
36 |
37 | [alt]
38 | index=5
39 | type=VARCHAR(255)
40 | comment=Alternative allele, '-' for deletion.
41 |
--------------------------------------------------------------------------------
/test/fmt/dbSNP_hg19validation.fmt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Format/New for
5 | # a description of the format of this file.
6 |
7 | [format description]
8 | description=validation information from dbSNP. the input columns are: chr, pos. If the primary reference genome for the project is hg18 and it must have an alternative reference hg19.
9 | position=chr, %(pos)s
10 | variant_info=%(var_info)s
11 |
12 | [DEFAULT]
13 | pos=alt_pos
14 | pos_comment=Field for position. To export indel, set it to 'pos-length(upstream)'
15 |
16 | var_info=mut_type_dbSNP, validation
17 | var_info_comment=Variant information fields to be imported.
18 |
19 | [chr]
20 | index=1
21 | type=VARCHAR(20)
22 | adj=RemoveLeading('chr')
23 | comment=Chromosome
24 |
25 | [alt_pos]
26 | index=2
27 | type=INTEGER NOT NULL
28 | comment=1-based position, hg19
29 |
30 | [mut_type_dbSNP]
31 | index=3
32 | type=VARCHAR(255)
33 | comment=Functional cetegory of the SNP (coding-synon, coding-nonsynon, intron, etc.)
34 |
35 | [validation]
36 | index=4
37 | type=VARCHAR(255)
38 | comment=validation status, can be unknown, by-cluster, by-frequency, by-submitter, by-2hit-2allele, by-hapmap, and by-1000genomes
39 |
--------------------------------------------------------------------------------
/test/fmt/genotypes.fmt:
--------------------------------------------------------------------------------
1 | [format description]
2 | description=Input format for variants with multiple sample genotypes. The input file genotypes.txt is prepared by transposing and combining fields from some Hapmap data file in PED/MAP format.
3 | variant=chr,%(pos)s,%(ref)s,%(alt)s
4 | genotype=%(geno)s
5 | variant_info=%(var_info)s
6 | genotype_info=%(geno_info)s
7 |
8 | [DEFAULT]
9 | pos=pos
10 | pos_comment=Field for position. To export indel, set it to 'pos-length(upstream)'
11 |
12 | ref=ref
13 | ref_comment=Field for reference allele.
14 |
15 | alt=alt
16 | alt_comment=Field for alternative allele.
17 |
18 | geno=GT
19 | geno_comment=Field to extract genotype from .vcf file. You can set it to
20 | safe_GT if genotype is not the first field in the genotype columns of your .vcf file.
21 |
22 | var_info=snp_id, genet_dist
23 | var_info_comment=Variant information fields to be imported.
24 |
25 | geno_info=
26 | geno_info_comment=Optional genotype info
27 |
28 | [chr]
29 | index=1
30 | type=VARCHAR(20)
31 | adj=RemoveLeading('chr')
32 | comment=Chromosome
33 |
34 | [snp_id]
35 | index=2
36 | type=VARCHAR(255)
37 | comment=rs number or snp identifier
38 |
39 | [genet_dist]
40 | index=3
41 | type=INTEGER
42 | comment=Genetic distance (morgans)
43 |
44 | [pos]
45 | index=4
46 | type=INTEGER NOT NULL
47 | comment=1-based Position of the snp
48 |
49 | [ref]
50 | index=5
51 | type=VARCHAR(255)
52 | comment=Reference allele
53 |
54 | [alt]
55 | index=6
56 | type=VARCHAR(255)
57 | comment=Alternative allele
58 |
59 | [GT]
60 | index=7:
61 | type=INTEGER
62 | adj=Nullify(['.', '0'])
63 | comment=Gentoype coded as 1 (ref alt) and 2 (alt alt)
64 |
--------------------------------------------------------------------------------
/test/fmt/randcol.fmt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 Bo Peng (bpeng@mdanderson.org)
2 | # Distributed under GPL. see
3 | #
4 | # Please refer to http://varianttools.sourceforge.net/Format/New for
5 | # a description of the format of this file.
6 |
7 | [format description]
8 | description=A basic variant input format with four columns: chr, pos, ref, alt.
9 | variant=chr,pos,ref,alt
10 | variant_info=%(var_info)s
11 |
12 | [DEFAULT]
13 | chr_col=1
14 | chr_col_comment=Column index for the chromosome field
15 |
16 | pos_col=2
17 | pos_col_comment=Column index for the position field
18 |
19 | ref_col=3
20 | ref_col_comment=Column index for the reference field
21 |
22 | alt_col=4
23 | alt_col_comment=Column index for the alternative field
24 |
25 | pos_adj=0
26 | pos_adj_comment=Set to 1 if the input position is zero-based.
27 |
28 | var_info=
29 |
30 |
31 | [chr]
32 | index=%(chr_col)s
33 | type=VARCHAR(20)
34 | adj=RemoveLeading('chr')
35 | comment=Chromosome
36 |
37 | [pos]
38 | index=%(pos_col)s
39 | adj=IncreaseBy(%(pos_adj)s)
40 | type=INTEGER NOT NULL
41 | comment=1-based position
42 |
43 | [ref]
44 | index=%(ref_col)s
45 | type=VARCHAR(255)
46 | comment=Reference allele, '-' for insertion.
47 |
48 | [alt]
49 | index=%(alt_col)s
50 | type=VARCHAR(255)
51 | comment=Alternative allele, '-' for deletion.
52 |
53 | [grpby]
54 | index=5
55 | type=VARCHAR(255)
56 | comment=group names
57 |
--------------------------------------------------------------------------------
/test/output/assogrp1.txt:
--------------------------------------------------------------------------------
1 | chr pos ref alt group
2 | 1 742429 G A 2
3 | 1 742456 T G 0
4 | 1 742584 A G 1
5 | 1 743268 C A 2
6 | 1 743288 T C 2
7 | 1 743404 G A 1
8 | 1 743712 G T 0
9 | 1 744074 G A 0
10 | 1 744197 T C 1
11 | 1 744366 G A 2
12 | 22 49522492 A G 0
13 | 22 49522870 G C 1
14 | 22 49523030 T C 1
15 | 22 49524123 C T 2
16 | 22 49524956 G A 2
17 | 22 49525473 G C 0
18 | 22 49525866 C A 2
19 | 22 49529883 C T 2
20 | 22 49532714 G A 2
21 | 22 49533094 C T 2
22 | 22 49533142 T C 2
23 | 22 49534358 A G 1
24 | 22 49534570 T C 2
25 | 22 49534642 T C 2
26 | 22 49534747 G C 2
27 | 22 49534764 T C 1
28 | 22 49534781 C T 1
29 |
--------------------------------------------------------------------------------
/test/output/assogrp2.txt:
--------------------------------------------------------------------------------
1 | chr pos ref alt group
2 | 1 742429 G A 4
3 | 1 742456 T G 0
4 | 1 742584 A G 3
5 | 1 743268 C A 4
6 | 1 743288 T C 0
7 | 1 743404 G A 1
8 | 1 743712 G T 1
9 | 1 744074 G A 2
10 | 1 744197 T C 4
11 | 1 744366 G A 3
12 | 22 49522492 A G 3
13 | 22 49522870 G C 0
14 | 22 49523030 T C 2
15 | 22 49524123 C T 2
16 | 22 49524956 G A 2
17 | 22 49525473 G C 4
18 | 22 49525866 C A 3
19 | 22 49529883 C T 3
20 | 22 49532714 G A 2
21 | 22 49533094 C T 3
22 | 22 49533142 T C 1
23 | 22 49534358 A G 1
24 | 22 49534570 T C 1
25 | 22 49534642 T C 1
26 | 22 49534747 G C 4
27 | 22 49534764 T C 3
28 | 22 49534781 C T 4
29 |
--------------------------------------------------------------------------------
/test/output/assogrp3.txt:
--------------------------------------------------------------------------------
1 | chr pos ref alt group
2 | 1 742429 G A 7
3 | 1 742456 T G 0
4 | 1 742584 A G 6
5 | 1 743268 C A 5
6 | 1 743288 T C 2
7 | 1 743404 G A 5
8 | 1 743712 G T 8
9 | 1 744074 G A 7
10 | 1 744197 T C 3
11 | 1 744366 G A 4
12 | 22 49522492 A G 0
13 | 22 49522870 G C 4
14 | 22 49523030 T C 8
15 | 22 49524123 C T 5
16 | 22 49524956 G A 3
17 | 22 49525473 G C 5
18 | 22 49525866 C A 7
19 | 22 49529883 C T 1
20 | 22 49532714 G A 3
21 | 22 49533094 C T 5
22 | 22 49533142 T C 0
23 | 22 49534358 A G 0
24 | 22 49534570 T C 2
25 | 22 49534642 T C 5
26 | 22 49534747 G C 4
27 | 22 49534764 T C 8
28 | 22 49534781 C T 2
29 |
--------------------------------------------------------------------------------
/test/output/assogrp4.txt:
--------------------------------------------------------------------------------
1 | chr pos ref alt group
2 | 1 742429 G A 0
3 | 1 742456 T G 4
4 | 1 742584 A G 8
5 | 1 743268 C A 0
6 | 1 743288 T C 10
7 | 1 743404 G A 5
8 | 1 743712 G T 5
9 | 1 744074 G A 11
10 | 1 744197 T C 2
11 | 1 744366 G A 2
12 | 22 49522492 A G 8
13 | 22 49522870 G C 4
14 | 22 49523030 T C 1
15 | 22 49524123 C T 2
16 | 22 49524956 G A 10
17 | 22 49525473 G C 10
18 | 22 49525866 C A 1
19 | 22 49529883 C T 3
20 | 22 49532714 G A 11
21 | 22 49533094 C T 10
22 | 22 49533142 T C 10
23 | 22 49534358 A G 6
24 | 22 49534570 T C 0
25 | 22 49534642 T C 10
26 | 22 49534747 G C 4
27 | 22 49534764 T C 9
28 | 22 49534781 C T 3
29 |
--------------------------------------------------------------------------------
/test/output/assogrp5.txt:
--------------------------------------------------------------------------------
1 | chr pos ref alt group
2 | 1 742429 G A 6
3 | 1 742456 T G 11
4 | 1 742584 A G 7
5 | 1 743268 C A 8
6 | 1 743288 T C 6
7 | 1 743404 G A 12
8 | 1 743712 G T 9
9 | 1 744074 G A 6
10 | 1 744197 T C 12
11 | 1 744366 G A 7
12 | 22 49522492 A G 14
13 | 22 49522870 G C 1
14 | 22 49523030 T C 6
15 | 22 49524123 C T 9
16 | 22 49524956 G A 12
17 | 22 49525473 G C 13
18 | 22 49525866 C A 14
19 | 22 49529883 C T 12
20 | 22 49532714 G A 6
21 | 22 49533094 C T 12
22 | 22 49533142 T C 5
23 | 22 49534358 A G 8
24 | 22 49534570 T C 5
25 | 22 49534642 T C 2
26 | 22 49534747 G C 14
27 | 22 49534764 T C 5
28 | 22 49534781 C T 6
29 |
--------------------------------------------------------------------------------
/test/output/assogrp6.txt:
--------------------------------------------------------------------------------
1 | chr pos ref alt group
2 | 1 742429 G A 15
3 | 1 742456 T G 9
4 | 1 742584 A G 13
5 | 1 743268 C A 12
6 | 1 743288 T C 5
7 | 1 743404 G A 14
8 | 1 743712 G T 4
9 | 1 744074 G A 15
10 | 1 744197 T C 12
11 | 1 744366 G A 14
12 | 22 49522492 A G 14
13 | 22 49522870 G C 17
14 | 22 49523030 T C 2
15 | 22 49524123 C T 2
16 | 22 49524956 G A 0
17 | 22 49525473 G C 0
18 | 22 49525866 C A 9
19 | 22 49529883 C T 14
20 | 22 49532714 G A 5
21 | 22 49533094 C T 11
22 | 22 49533142 T C 7
23 | 22 49534358 A G 5
24 | 22 49534570 T C 13
25 | 22 49534642 T C 19
26 | 22 49534747 G C 14
27 | 22 49534764 T C 16
28 | 22 49534781 C T 2
29 |
--------------------------------------------------------------------------------
/test/output/assogrp7.txt:
--------------------------------------------------------------------------------
1 | chr pos ref alt group
2 | 1 742429 G A 1
3 | 1 742456 T G 3
4 | 1 742584 A G 2
5 | 1 743268 C A 22
6 | 1 743288 T C 5
7 | 1 743404 G A 15
8 | 1 743712 G T 1
9 | 1 744074 G A 21
10 | 1 744197 T C 15
11 | 1 744366 G A 21
12 | 22 49522492 A G 3
13 | 22 49522870 G C 2
14 | 22 49523030 T C 9
15 | 22 49524123 C T 24
16 | 22 49524956 G A 22
17 | 22 49525473 G C 21
18 | 22 49525866 C A 21
19 | 22 49529883 C T 0
20 | 22 49532714 G A 4
21 | 22 49533094 C T 13
22 | 22 49533142 T C 0
23 | 22 49534358 A G 4
24 | 22 49534570 T C 15
25 | 22 49534642 T C 7
26 | 22 49534747 G C 15
27 | 22 49534764 T C 3
28 | 22 49534781 C T 5
29 |
--------------------------------------------------------------------------------
/test/output/assogrp8.txt:
--------------------------------------------------------------------------------
1 | chr pos ref alt group
2 | 1 742429 G A 29
3 | 1 742456 T G 0
4 | 1 742584 A G 14
5 | 1 743268 C A 12
6 | 1 743288 T C 9
7 | 1 743404 G A 27
8 | 1 743712 G T 28
9 | 1 744074 G A 2
10 | 1 744197 T C 6
11 | 1 744366 G A 24
12 | 22 49522492 A G 0
13 | 22 49522870 G C 21
14 | 22 49523030 T C 1
15 | 22 49524123 C T 16
16 | 22 49524956 G A 29
17 | 22 49525473 G C 12
18 | 22 49525866 C A 23
19 | 22 49529883 C T 18
20 | 22 49532714 G A 4
21 | 22 49533094 C T 26
22 | 22 49533142 T C 22
23 | 22 49534358 A G 17
24 | 22 49534570 T C 19
25 | 22 49534642 T C 13
26 | 22 49534747 G C 22
27 | 22 49534764 T C 27
28 | 22 49534781 C T 4
29 |
--------------------------------------------------------------------------------
/test/output/assores1.txt:
--------------------------------------------------------------------------------
1 | 0 0.430306 0.46442 0.731729
2 | 1 0.0828568 0.381475 0.875376
3 | 2 -0.138939 0.274634 -1.09275
4 |
--------------------------------------------------------------------------------
/test/output/assores2.txt:
--------------------------------------------------------------------------------
1 | 0 0.0669885 0.70313 0.381154
2 | 1 0.285605 0.352182 0.930582
3 | 3 -0.00624929 0.967045 -0.0413205
4 | 4 -0.0335427 0.750033 -0.31864
5 |
--------------------------------------------------------------------------------
/test/output/assores3.txt:
--------------------------------------------------------------------------------
1 | 0 0.430306 0.46442 0.731729
2 | 1 -0.0609057 0.800118 -0.253228
3 | 2 0.152727 0.251324 1.14747
4 | 3 -0.259238 0.609967 -0.510201
5 | 4 -0.11057 0.361796 -0.912157
6 | 5 0.438295 0.334969 0.964387
7 | 8 -0.0218357 0.916581 -0.104755
8 |
--------------------------------------------------------------------------------
/test/output/assores4.txt:
--------------------------------------------------------------------------------
1 | 0 0.156852 0.705634 0.37778
2 | 2 0.0479459 0.908071 0.115486
3 | 3 0.0962765 0.423544 0.800452
4 | 4 -0.109615 0.362665 -0.910508
5 | 5 0.438295 0.334969 0.964387
6 | 9 -0.0218357 0.916581 -0.104755
7 |
--------------------------------------------------------------------------------
/test/output/assores5.txt:
--------------------------------------------------------------------------------
1 | 1 0.0313213 0.86489 0.170175
2 | 11 0.430306 0.46442 0.731729
3 | 12 0.00248295 0.98993 0.0126225
4 | 14 -0.255987 0.114477 -1.57908
5 | 5 0.0142086 0.939371 0.0760707
6 | 6 0.15144 0.280018 1.08057
7 | 7 0.662234 0.35682 0.921654
8 |
--------------------------------------------------------------------------------
/test/output/assores6.txt:
--------------------------------------------------------------------------------
1 | 12 -0.259238 0.609967 -0.510201
2 | 13 0.156852 0.705634 0.37778
3 | 14 -0.121077 0.344582 -0.945374
4 | 16 -0.0218357 0.916581 -0.104755
5 | 17 0.0313213 0.86489 0.170175
6 | 2 0.15144 0.280018 1.08057
7 | 9 0.430306 0.46442 0.731729
8 |
--------------------------------------------------------------------------------
/test/output/assores7.txt:
--------------------------------------------------------------------------------
1 | 0 -0.0609057 0.800118 -0.253228
2 | 15 -0.148412 0.285006 -1.06943
3 | 2 0.0313213 0.86489 0.170175
4 | 21 0.662234 0.35682 0.921654
5 | 3 0.0286987 0.88404 0.145869
6 | 5 0.15144 0.280018 1.08057
7 |
--------------------------------------------------------------------------------
/test/output/assores8.txt:
--------------------------------------------------------------------------------
1 | 0 0.430306 0.46442 0.731729
2 | 18 -0.0609057 0.800118 -0.253228
3 | 19 0.156852 0.705634 0.37778
4 | 21 0.0313213 0.86489 0.170175
5 | 22 -0.255987 0.114477 -1.57908
6 | 24 0.662234 0.35682 0.921654
7 | 27 0.0582982 0.758744 0.307173
8 | 4 0.15144 0.280018 1.08057
9 | 6 -0.259238 0.609967 -0.510201
10 |
--------------------------------------------------------------------------------
/test/output/assores_wss1.txt:
--------------------------------------------------------------------------------
1 | 0 0.745032 0.46442 0.731729
2 | 1 0.118456 0.814949 0.234077
3 | 2 -0.136058 0.791039 -0.264997
4 |
--------------------------------------------------------------------------------
/test/output/assores_wss2.txt:
--------------------------------------------------------------------------------
1 | 0 0.461022 0.522973 0.638882
2 | 1 0.683604 0.342004 0.950439
3 | 3 0.173439 0.766826 0.29657
4 | 4 -1.06808 0.139063 -1.47988
5 |
--------------------------------------------------------------------------------
/test/output/assores_wss3.txt:
--------------------------------------------------------------------------------
1 | 0 0.745032 0.46442 0.731729
2 | 1 -0.257819 0.800118 -0.253228
3 | 2 0.38392 0.705634 0.37778
4 | 3 -0.518217 0.609967 -0.510201
5 | 4 -0.166461 0.779178 -0.280429
6 | 5 0.979445 0.334969 0.964387
7 | 8 -0.164246 0.874862 -0.157506
8 |
--------------------------------------------------------------------------------
/test/output/assores_wss4.txt:
--------------------------------------------------------------------------------
1 | 0 0.38392 0.705634 0.37778
2 | 2 0.20908 0.771191 0.290857
3 | 3 -0.257819 0.800118 -0.253228
4 | 4 -0.233108 0.694741 -0.392486
5 | 5 0.979445 0.334969 0.964387
6 | 9 -0.164246 0.874862 -0.157506
7 |
--------------------------------------------------------------------------------
/test/output/assores_wss5.txt:
--------------------------------------------------------------------------------
1 | 1 0.173713 0.86489 0.170175
2 | 11 0.745032 0.46442 0.731729
3 | 12 0.0687264 0.907108 0.116701
4 | 14 -1.61089 0.114477 -1.57908
5 | 5 0.117689 0.871988 0.161155
6 | 6 0 1 0
7 | 7 0.936306 0.35682 0.921654
8 |
--------------------------------------------------------------------------------
/test/output/assores_wss6.txt:
--------------------------------------------------------------------------------
1 | 12 -0.518217 0.609967 -0.510201
2 | 13 0.38392 0.705634 0.37778
3 | 14 0.0161445 0.974908 0.0314572
4 | 16 -0.164246 0.874862 -0.157506
5 | 17 0.173713 0.86489 0.170175
6 | 2 0 1 0
7 | 9 0.745032 0.46442 0.731729
8 |
--------------------------------------------------------------------------------
/test/output/assores_wss7.txt:
--------------------------------------------------------------------------------
1 | 0 -0.257819 0.800118 -0.253228
2 | 15 -0.190776 0.709542 -0.372525
3 | 2 0.173713 0.86489 0.170175
4 | 21 0.936306 0.35682 0.921654
5 | 3 0.302467 0.6787 0.414299
6 | 5 0 1 0
7 |
--------------------------------------------------------------------------------
/test/output/assores_wss8.txt:
--------------------------------------------------------------------------------
1 | 0 0.745032 0.46442 0.731729
2 | 18 -0.257819 0.800118 -0.253228
3 | 19 0.38392 0.705634 0.37778
4 | 21 0.173713 0.86489 0.170175
5 | 22 -1.61089 0.114477 -1.57908
6 | 24 0.936306 0.35682 0.921654
7 | 27 0.424429 0.560597 0.582052
8 | 4 0 1 0
9 | 6 -0.518217 0.609967 -0.510201
10 |
--------------------------------------------------------------------------------
/test/output/evsVariantTest.txt:
--------------------------------------------------------------------------------
1 | id ref alt DP MQ ANNO SVM
2 | 1 G A 472 28 nonsynonymous:OR4F5:NM_001005484:exon1:c.G26A:p.G9D, -1.4352462
3 | 2 A G 602 28 nonsynonymous:OR4F5:NM_001005484:exon1:c.A44G:p.E15G, -1.2097349
4 | 3 A G 4094 31 synonymous:OR4F5:NM_001005484:exon1:c.A180G:p.S60S, -0.9014406
5 | 4 T G 525238 33 nonsynonymous:OR4F5:NM_001005484:exon1:c.T338G:p.F113C, -0.85266604
6 | 5 G C 48194 58 intronic:COL16A1 0.053230746
7 | 6 G T 48194 58 intronic:COL16A1 0.053230746
8 |
--------------------------------------------------------------------------------
/test/output/exclude_anno1.txt:
--------------------------------------------------------------------------------
1 | 761732
2 | 761752
3 | 761800
4 | 761811
5 | 762589
6 | 762592
7 |
--------------------------------------------------------------------------------
/test/output/exclude_sift.txt:
--------------------------------------------------------------------------------
1 | 619
2 |
--------------------------------------------------------------------------------
/test/output/genotype_variant_sample_output.txt:
--------------------------------------------------------------------------------
1 | 3 0 3
2 | 7 0 7
3 | 7 0 7
4 | 4 0 4
5 | 4 0 4
6 | 5 0 5
7 | 4 0 4
8 | 6 0 6
9 | 3 0 3
10 | 4 0 4
11 | 7 0 7
12 | 8 0 8
13 | 8 0 8
14 | 8 0 8
15 | 5 0 5
16 | 6 0 6
17 | 9 0 9
18 | 8 0 8
19 | 9 0 9
20 | 5 0 5
21 | 9 0 9
22 | 8 0 8
23 | 7 0 7
24 | 3 0 3
25 | 7 0 7
26 | 5 0 5
27 | 7 0 7
28 | 5 0 5
29 | 7 0 7
30 | 4 0 4
31 | 6 0 6
32 | 4 0 4
33 | 5 0 5
34 | 7 0 7
35 | 3 0 3
36 | 3 0 3
37 | 6 0 6
38 | 6 0 6
39 | 5 0 5
40 | 5 0 5
41 | 5 0 5
42 | 4 0 4
43 | 7 0 7
44 | 5 0 5
45 | 6 0 6
46 | 9 0 9
47 | 6 0 6
48 | 2 0 2
49 | 9 0 9
50 |
--------------------------------------------------------------------------------
/test/output/import_cga.txt:
--------------------------------------------------------------------------------
1 | 1 38907 C T
2 | 1 41981 A G
3 | 1 46670 A G
4 | 1 47108 G C
5 | 1 47292 T G
6 | 1 49272 G A
7 | 1 49291 C T
8 | 1 49342 G T
9 | 1 49363 C T
10 | 1 51476 T C
11 | 1 51673 T C
12 | 1 52238 T G
13 | 1 52727 C G
14 | 1 53206 G C
15 | 1 54043 C T
16 | 1 54586 T C
17 | 1 54676 C T
18 | 1 55164 C A
19 | 1 55381 G A
20 | 1 55394 T A
21 | 1 55545 C T
22 | 1 55550 A T
23 | 1 55816 G A
24 | 1 55850 C G
25 | 1 55926 T C
26 | 1 55976 T C
27 | 1 56485 C T
28 | 1 56638 C T
29 | 1 56799 T C
30 | 1 57246 C G
31 | 1 57376 C T
32 | 1 57952 A C
33 | 1 57999 G T
34 | 1 58211 A G
35 | 1 58349 A G
36 | 1 58812 C T
37 | 1 59051 A G
38 | 1 59276 C T
39 | 1 59498 T C
40 | 1 60273 T C
41 | 1 60408 C T
42 | 1 60726 C A
43 | 1 60791 A G
44 | 1 61290 - G
45 | 1 61442 A G
46 | 1 61480 G C
47 | 1 61499 G A
48 | 1 62180 T G
49 | 1 62190 A C
50 | 1 62203 T C
51 | 1 62298 - CTTC
52 | 1 62578 G A
53 | 1 63074 A C
54 | 1 63792 G T
55 | 1 64125 C T
56 | 1 64976 C T
57 | 1 66008 C G
58 | 1 66131 C G
59 | 1 67184 G A
60 | 1 67223 C A
61 | 1 67242 A C
62 | 1 67605 T C
63 | 1 68306 C T
64 | 1 68316 T C
65 | 1 69511 A G
66 | 1 69552 G C
67 | 1 69569 T C
68 | 1 72787 C T
69 | 1 76846 T A
70 | 1 77110 A G
71 | 1 78035 G A
72 | 1 79078 G C
73 | 1 79202 G A
74 | 1 80141 A G
75 | 1 80443 AACAA -
76 | 1 81100 T G
77 | 1 81204 T C
78 | 1 81374 T C
79 | 1 81437 G A
80 | 1 82734 T C
81 | 1 85150 G A
82 | 1 87683 T C
83 | 1 87702 G A
84 | 1 87805 T C
85 | 1 87959 T C
86 | 1 88169 C T
87 | 1 88172 G A
88 | 1 88265 C T
89 | 1 88295 T A
90 | 1 88463 A G
91 | 1 91581 G A
92 | 1 92638 A T
93 | 1 92654 T C
94 | 1 107332 T C
95 | 1 243851 AAGT -
96 |
--------------------------------------------------------------------------------
/test/output/import_cga_phenotype.txt:
--------------------------------------------------------------------------------
1 | sample_name filename num_genotypes sample_genotype_fields
2 | samp_csv txt/CGA.tsv.bz2 95 GT,allele1VarScoreVAF,allele2VarScoreVAF,allele1VarScoreEAF,allele2VarScoreEAF
3 |
--------------------------------------------------------------------------------
/test/output/import_genotype_1.txt:
--------------------------------------------------------------------------------
1 | 1 rs2843403 0 2518957 C T
2 | 1 rs4648462 0 3155127 A C
3 | 1 rs7410846 0 3926588 G A
4 | 1 rs1490413 0 4267183 G A
5 | 1 rs1878052 0 4452662 G A
6 | 1 rs2071999 0 4673126 A C
7 | 1 rs10915297 0 4910002 T C
8 | 1 rs521430 0 5206936 C T
9 | 1 rs1935759 0 5526603 T A
10 | 1 rs548726 0 5836208 C T
11 | 1 rs6680884 0 6190958 G A
12 | 1 rs277686 0 6670484 C G
13 | 1 rs2071917 0 7073114 G A
14 | 1 rs1750838 0 7441851 A G
15 | 1 rs228688 0 7801717 G T
16 |
--------------------------------------------------------------------------------
/test/output/import_genotype_2.txt:
--------------------------------------------------------------------------------
1 | sample_name filename num_genotypes sample_genotype_fields
2 | V1 vcf/V1.vcf 989 GT
3 | DUP vcf/dup_geno.vcf 989 GT
4 |
--------------------------------------------------------------------------------
/test/output/import_mpi_multi_genotypes.txt:
--------------------------------------------------------------------------------
1 | sample_name filename num_genotypes sample_genotype_fields
2 | SAMP1 vcf/V1.vcf 989 GT
3 | SAMP1 vcf/V2.vcf 990 GT
4 | SAMP1 vcf/V3.vcf 988 GT
5 |
--------------------------------------------------------------------------------
/test/output/import_mpi_multi_samples.txt:
--------------------------------------------------------------------------------
1 | sample_name filename
2 | SAMP1 vcf/V1.vcf
3 | SAMP1 vcf/V2.vcf
4 | SAMP1 vcf/V3.vcf
5 |
--------------------------------------------------------------------------------
/test/output/import_mpi_multi_variant.txt:
--------------------------------------------------------------------------------
1 | Name: variant
2 | Description: Master variant table
3 | Creation date: Jan14
4 | Command:
5 | Fields: variant_id, bin, chr, pos, ref, alt
6 | Number of variants: 1611
7 |
--------------------------------------------------------------------------------
/test/output/import_mpi_samples.txt:
--------------------------------------------------------------------------------
1 | sample_name filename
2 | NA06985 vcf/CEU.vcf.gz
3 | NA06986 vcf/CEU.vcf.gz
4 | NA06994 vcf/CEU.vcf.gz
5 | NA07000 vcf/CEU.vcf.gz
6 | NA07037 vcf/CEU.vcf.gz
7 | NA07051 vcf/CEU.vcf.gz
8 | NA07346 vcf/CEU.vcf.gz
9 | NA07347 vcf/CEU.vcf.gz
10 | NA07357 vcf/CEU.vcf.gz
11 | NA10847 vcf/CEU.vcf.gz
12 | NA10851 vcf/CEU.vcf.gz
13 | NA11829 vcf/CEU.vcf.gz
14 | NA11830 vcf/CEU.vcf.gz
15 | NA11831 vcf/CEU.vcf.gz
16 | NA11832 vcf/CEU.vcf.gz
17 | NA11840 vcf/CEU.vcf.gz
18 | NA11881 vcf/CEU.vcf.gz
19 | NA11894 vcf/CEU.vcf.gz
20 | NA11918 vcf/CEU.vcf.gz
21 | NA11919 vcf/CEU.vcf.gz
22 | NA11920 vcf/CEU.vcf.gz
23 | NA11931 vcf/CEU.vcf.gz
24 | NA11992 vcf/CEU.vcf.gz
25 | NA11993 vcf/CEU.vcf.gz
26 | NA11994 vcf/CEU.vcf.gz
27 | NA11995 vcf/CEU.vcf.gz
28 | NA12003 vcf/CEU.vcf.gz
29 | NA12004 vcf/CEU.vcf.gz
30 | NA12005 vcf/CEU.vcf.gz
31 | NA12006 vcf/CEU.vcf.gz
32 | NA12043 vcf/CEU.vcf.gz
33 | NA12044 vcf/CEU.vcf.gz
34 | NA12045 vcf/CEU.vcf.gz
35 | NA12144 vcf/CEU.vcf.gz
36 | NA12154 vcf/CEU.vcf.gz
37 | NA12155 vcf/CEU.vcf.gz
38 | NA12156 vcf/CEU.vcf.gz
39 | NA12234 vcf/CEU.vcf.gz
40 | NA12249 vcf/CEU.vcf.gz
41 | NA12287 vcf/CEU.vcf.gz
42 | NA12414 vcf/CEU.vcf.gz
43 | NA12489 vcf/CEU.vcf.gz
44 | NA12716 vcf/CEU.vcf.gz
45 | NA12717 vcf/CEU.vcf.gz
46 | NA12749 vcf/CEU.vcf.gz
47 | NA12750 vcf/CEU.vcf.gz
48 | NA12751 vcf/CEU.vcf.gz
49 | NA12760 vcf/CEU.vcf.gz
50 | NA12761 vcf/CEU.vcf.gz
51 | NA12762 vcf/CEU.vcf.gz
52 | NA12763 vcf/CEU.vcf.gz
53 | NA12776 vcf/CEU.vcf.gz
54 | NA12812 vcf/CEU.vcf.gz
55 | NA12813 vcf/CEU.vcf.gz
56 | NA12814 vcf/CEU.vcf.gz
57 | NA12815 vcf/CEU.vcf.gz
58 | NA12828 vcf/CEU.vcf.gz
59 | NA12872 vcf/CEU.vcf.gz
60 | NA12873 vcf/CEU.vcf.gz
61 | NA12874 vcf/CEU.vcf.gz
62 |
--------------------------------------------------------------------------------
/test/output/import_mpi_variant.txt:
--------------------------------------------------------------------------------
1 | Name: variant
2 | Description: Master variant table
3 | Creation date: Jan14
4 | Command:
5 | Fields: variant_id, bin, chr, pos, ref, alt
6 | Number of variants: 288
7 |
--------------------------------------------------------------------------------
/test/output/import_multi_sample2_samples.txt:
--------------------------------------------------------------------------------
1 | sample_name filename
2 | SMP1 txt/sample_1_chr22.txt
3 | SMP2 txt/sample_1_chr22.txt
4 | SMP3 txt/sample_1_chr22.txt
5 |
--------------------------------------------------------------------------------
/test/output/import_multi_sample2_samples_hdf5.txt:
--------------------------------------------------------------------------------
1 | sample_name filename
2 | SMP1 txt/sample_1_chr22.txt
3 | SMP2 txt/sample_1_chr22.txt
4 | SMP3 txt/sample_1_chr22.txt
5 |
--------------------------------------------------------------------------------
/test/output/import_multi_sample2_variant.txt:
--------------------------------------------------------------------------------
1 | Name: variant
2 | Description: Master variant table
3 | Creation date: Jan13
4 | Command:
5 | Fields: variant_id, bin, chr, pos, ref, alt
6 | Number of variants: 9
7 |
--------------------------------------------------------------------------------
/test/output/import_multi_sample_samples.txt:
--------------------------------------------------------------------------------
1 | sample_name filename
2 | SMP1 txt/sample_chr22.txt
3 | SMP2 txt/sample_chr22.txt
4 | SMP3 txt/sample_chr22.txt
5 |
--------------------------------------------------------------------------------
/test/output/import_multi_sample_samples_hdf5.txt:
--------------------------------------------------------------------------------
1 | sample_name filename
2 | SMP1 txt/sample_chr22.txt
3 | SMP2 txt/sample_chr22.txt
4 | SMP3 txt/sample_chr22.txt
5 |
--------------------------------------------------------------------------------
/test/output/import_multi_sample_variant.txt:
--------------------------------------------------------------------------------
1 | Name: variant
2 | Description: Master variant table
3 | Creation date: Jan13
4 | Command:
5 | Fields: variant_id, bin, chr, pos, ref, alt
6 | Number of variants: 6
7 |
--------------------------------------------------------------------------------
/test/output/import_vcf_ref.txt:
--------------------------------------------------------------------------------
1 | 1 10434 - C
2 | 1 54790 - T
3 | 1 81963 - AA
4 | 1 82134 - AAAAAAAAAAAAAA
5 | 1 83787 - A
6 | 1 83873 - AG
7 | 1 83932 - A
8 | 1 83936 - AAA
9 | 1 87277 - T
10 | 1 91552 - T
11 | 1 120987 - TA
12 | 1 120996 - TAT
13 | 1 121018 - TATC
14 | 1 121047 - TAATAT
15 | 1 121051 - ATC
16 | 1 121063 - AACA
17 | 1 121065 - TTG
18 | 1 121070 - C
19 | 1 121073 - TAC
20 | 1 121083 - TATCT
21 | 1 241160 - TTC
22 | 1 718787 - T
23 | 1 723805 - A
24 | 1 724138 - AATGG
25 | 1 724138 - AATGGAATGGAATGG
26 | 1 724189 - ATGGAATGGG
27 | 1 724499 - C
28 | 1 726945 - GAATG
29 | 1 746215 - AAC
30 | 1 749964 - AA
31 | 1 750063 - G
32 | 1 753842 - C
33 | 1 761958 - T
34 | 1 768117 - GTTTT
35 | 1 768118 - TT
36 | 1 768625 - A
37 | 1 770426 - TCCCTCTCCCTTGCCTCCCTCCCCATCCATCTGCCCATCCCTCCATCCACCTCTTCATCTCTCCTTTCCTCCC
38 | 1 773870 - A
39 | 1 774008 - AGC
40 | 1 774884 - GACACACACACCTAGACACACACACCTGGACACACACACGTA
41 | 1 775257 - AAAG
42 | 1 778303 - CT
43 | 1 778303 - CCT
44 | 1 779912 - T
45 | 1 782959 - G
46 | 1 784987 - T
47 | 1 787071 - G
48 | 1 790697 - AT
49 | 1 790697 - TA
50 | 1 791108 - A
51 | 1 791130 - A
52 | 1 791759 - A
53 | 1 795531 - G
54 | 1 795559 - TTTTTT
55 | 1 796473 - C
56 | 1 797131 - TAA
57 | 1 800617 - C
58 | 1 801996 - TGGTCCTCCCTCTGCACTCACATCCCTGACGTCCTCCCGAGCCCTCACA
59 | 1 802232 - GCCCTCACGTGGTCCTCCCCCTGCACTCACATCCCTGACGTCCTCCCGAGCCCTCACATGGTCCTCCCCCTGCACTCACATCCCTGACATCCTCCCGT
60 | 1 803936 - A
61 | 1 804256 - C
62 | 1 804328 - C
63 | 1 807303 - T
64 | 1 807303 - TT
65 | 1 813464 - C
66 | 1 814375 - TTG
67 | 1 817761 - TAT
68 | 1 818003 - C
69 | 1 818346 - T
70 | 1 818563 - A
71 | 1 819027 - T
72 | 1 819517 - T
73 | 1 819702 - GTCTATGT
74 |
--------------------------------------------------------------------------------
/test/output/missing_gen.tped:
--------------------------------------------------------------------------------
1 | 1 . . 69116 0 0 0 0 0 0 0 0
2 | 1 . . 69134 0 0 0 0 0 0 0 0
3 | 1 . . 69270 0 0 0 0 0 0 0 0
4 | 1 . . 69428 T T T T T T 0 0
5 |
--------------------------------------------------------------------------------
/test/output/remove_field_after.txt:
--------------------------------------------------------------------------------
1 | Name: variant
2 | Description: Master variant table
3 | Creation date: Jan18
4 | Command:
5 | Fields: variant_id, bin, chr, pos, ref, alt
6 | Number of variants: 2144
7 |
--------------------------------------------------------------------------------
/test/output/remove_field_before.txt:
--------------------------------------------------------------------------------
1 | Name: variant
2 | Description: Master variant table
3 | Creation date: Jan18
4 | Command:
5 | Fields: variant_id, bin, chr, pos, ref, alt, CEU_cases_num,
6 | DP, gene_name
7 | Number of variants: 2144
8 |
--------------------------------------------------------------------------------
/test/output/update_sum_stat.txt:
--------------------------------------------------------------------------------
1 | . . .
2 | . . .
3 | . . .
4 | 100 15 69.3333
5 | 6 3 4.0000
6 | 4 3 3.3333
7 |
--------------------------------------------------------------------------------
/test/output/use_field.txt:
--------------------------------------------------------------------------------
1 | 9468354 - A 1
2 |
--------------------------------------------------------------------------------
/test/output/use_position.txt:
--------------------------------------------------------------------------------
1 | 9468354 - A 1
2 |
--------------------------------------------------------------------------------
/test/output/vcf_assigned_sample_name_genotype.txt:
--------------------------------------------------------------------------------
1 | sample_name filename num_genotypes sample_genotype_fields
2 | samp_vcf1 vcf/SAMP1.vcf 289 GT
3 | samp_vcf2 vcf/SAMP2.vcf 288 GT
4 | samp_vcf3 vcf/SAMP...x_variants.vcf 135
5 |
--------------------------------------------------------------------------------
/test/output/vcf_single_sampleName_genotype.txt:
--------------------------------------------------------------------------------
1 | sample_name filename num_genotypes sample_genotype_fields
2 | SAMP1 vcf/SAMP1.vcf 289 GT
3 | SAMP2 vcf/SAMP2.vcf 288 GT
4 |
--------------------------------------------------------------------------------
/test/phenotype/phenotype.txt:
--------------------------------------------------------------------------------
1 | sample_name aff sex BMI
2 | NA06985 2 F 19.64
3 | NA06986 1 M None
4 | NA06994 1 F 19.49
5 | NA07000 2 F 21.52
6 | NA07037 2 F 23.05
7 | NA07051 1 F 21.01
8 | NA07346 1 F 18.93
9 | NA07347 2 M 19.2
10 | NA07357 2 M 20.61
11 | NA10847 2 M 14.6
12 | NA10851 2 M 22.28
13 | NA11829 1 M 20.58
14 | NA11830 1 F 16.64
15 | NA11831 1 F 18.79
16 | NA11832 2 F 19.74
17 | NA11840 2 M 18.82
18 | NA11881 2 F 17.79
19 | NA11894 2 F 19.22
20 | NA11918 2 F 25.56
21 | NA11919 1 F 22.28
22 | NA11920 2 F 23.18
23 | NA11931 2 F 16.15
24 | NA11992 2 F 20.41
25 | NA11993 2 F 17.09
26 | NA11994 2 M 23.74
27 | NA11995 1 F 17.15
28 | NA12003 2 M 24.13
29 | NA12004 2 F 20.31
30 | NA12005 2 M 17.73
31 | NA12006 2 M 19.99
32 | NA12043 2 F 21.91
33 | NA12044 1 M 21.07
34 | NA12045 1 F 23.73
35 | NA12144 1 F 21.15
36 | NA12154 2 M 22.35
37 | NA12155 1 F 21.39
38 | NA12156 1 F 21.67
39 | NA12234 2 F 15.31
40 | NA12249 2 M 21.36
41 | NA12287 2 F 24.58
42 | NA12414 1 F 20.67
43 | NA12489 1 F 20.29
44 | NA12716 1 F 16.92
45 | NA12717 1 F 20.38
46 | NA12749 1 F 21.25
47 | NA12750 1 F 17.7
48 | NA12751 1 F 25.97
49 | NA12760 2 F 23.35
50 | NA12761 2 M 20.71
51 | NA12762 2 M 20.3
52 | NA12763 1 M 19.6
53 | NA12776 2 M 21.01
54 | NA12812 2 F 18.22
55 | NA12813 2 M 18.16
56 | NA12814 1 F 24.41
57 | NA12815 2 M 17.33
58 | NA12828 1 M 18.69
59 | NA12872 2 M 21.15
60 | NA12873 2 F 20.32
61 | NA12874 1 M 19.41
62 | SAMP1 1 M 22.78
63 | SAMP2 2 F 24.43
64 |
--------------------------------------------------------------------------------
/test/plink/dat1.bed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/plink/dat1.bed
--------------------------------------------------------------------------------
/test/plink/dat1.fam:
--------------------------------------------------------------------------------
1 | 1 1 0 0 0 -9
2 | 2 2 0 0 0 -9
3 | 3 3 0 0 0 -9
4 | 4 4 0 0 0 -9
5 | 5 5 0 0 0 -9
6 | 6 6 0 0 0 -9
7 | 7 7 0 0 0 -9
8 | 8 8 0 0 0 -9
9 | 9 9 0 0 0 -9
10 | 10 10 0 0 0 -9
11 | 11 11 0 0 0 -9
12 | 12 12 0 0 0 -9
13 | 13 13 0 0 0 -9
14 | 14 14 0 0 0 -9
15 | 15 15 0 0 0 -9
16 | 16 16 0 0 0 -9
17 | 17 17 0 0 0 -9
18 | 18 18 0 0 0 -9
19 | 19 19 0 0 0 -9
20 | 20 20 0 0 0 -9
21 | 21 21 0 0 0 -9
22 | 22 22 0 0 0 -9
23 | 23 23 0 0 0 -9
24 | 24 24 0 0 0 -9
25 | 25 25 0 0 0 -9
26 | 26 26 0 0 0 -9
27 | 27 27 0 0 0 -9
28 | 28 28 0 0 0 -9
29 | 29 29 0 0 0 -9
30 | 30 30 0 0 0 -9
31 | 31 31 0 0 0 -9
32 | 32 32 0 0 0 -9
33 | 33 33 0 0 0 -9
34 | 34 34 0 0 0 -9
35 | 35 35 0 0 0 -9
36 | 36 36 0 0 0 -9
37 | 37 37 0 0 0 -9
38 | 38 38 0 0 0 -9
39 | 39 39 0 0 0 -9
40 | 40 40 0 0 0 -9
41 | 41 41 0 0 0 -9
42 | 42 42 0 0 0 -9
43 | 43 43 0 0 0 -9
44 | 44 44 0 0 0 -9
45 | 45 45 0 0 0 -9
46 | 46 46 0 0 0 -9
47 | 47 47 0 0 0 -9
48 | 48 48 0 0 0 -9
49 | 49 49 0 0 0 -9
50 | 50 50 0 0 0 -9
51 | 51 51 0 0 0 -9
52 | 52 52 0 0 0 -9
53 | 53 53 0 0 0 -9
54 | 54 54 0 0 0 -9
55 | 55 55 0 0 0 -9
56 | 56 56 0 0 0 -9
57 | 57 57 0 0 0 -9
58 | 58 58 0 0 0 -9
59 | 59 59 0 0 0 -9
60 | 60 60 0 0 0 -9
61 | 61 61 0 0 0 -9
62 | 62 62 0 0 0 -9
63 | 63 63 0 0 0 -9
64 | 64 64 0 0 0 -9
65 | 65 65 0 0 0 -9
66 | 66 66 0 0 0 -9
67 | 67 67 0 0 0 -9
68 | 68 68 0 0 0 -9
69 | 69 69 0 0 0 -9
70 | 70 70 0 0 0 -9
71 | 71 71 0 0 0 -9
72 | 72 72 0 0 0 -9
73 | 73 73 0 0 0 -9
74 | 74 74 0 0 0 -9
75 | 75 75 0 0 0 -9
76 | 76 76 0 0 0 -9
77 | 77 77 0 0 0 -9
78 | 78 78 0 0 0 -9
79 | 79 79 0 0 0 -9
80 | 80 80 0 0 0 -9
81 | 81 81 0 0 0 -9
82 | 82 82 0 0 0 -9
83 | 83 83 0 0 0 -9
84 | 84 84 0 0 0 -9
85 | 85 85 0 0 0 -9
86 | 86 86 0 0 0 -9
87 | 87 87 0 0 0 -9
88 | 88 88 0 0 0 -9
89 | 89 89 0 0 0 -9
90 | 90 90 0 0 0 -9
91 | 91 91 0 0 0 -9
92 | 92 92 0 0 0 -9
93 | 93 93 0 0 0 -9
94 | 94 94 0 0 0 -9
95 | 95 95 0 0 0 -9
96 | 96 96 0 0 0 -9
97 | 97 97 0 0 0 -9
98 | 98 98 0 0 0 -9
99 | 99 99 0 0 0 -9
100 | 100 100 0 0 0 -9
101 |
--------------------------------------------------------------------------------
/test/proj/assoproj.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/proj/assoproj.tar.gz
--------------------------------------------------------------------------------
/test/run_tests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # $File: ProcessTestCase $
4 | # $LastChangedDate: 2011-06-16 20:10:41 -0500 (Thu, 16 Jun 2011) $
5 | # $Rev: 4234 $
6 | #
7 | # This file is part of variant_tools, a software application to annotate,
8 | # summarize, and filter variants for next-gen sequencing ananlysis.
9 | # Please visit http://varianttools.sourceforge.net for details.
10 | #
11 | # Copyright (C) 2011 - 2013 Bo Peng (bpeng@mdanderson.org)
12 | #
13 | # This program is free software: you can redistribute it and/or modify
14 | # it under the terms of the GNU General Public License as published by
15 | # the Free Software Foundation, either version 3 of the License, or
16 | # (at your option) any later version.
17 | #
18 | # This program is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 | # GNU General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU General Public License
24 | # along with this program. If not, see .
25 | #
26 |
27 | import os
28 | import re
29 | import unittest
30 | import sys
31 |
32 | def importTests():
33 | tests = unittest.TestSuite()
34 | for file in os.listdir('.'):
35 | match = re.match("^(test_(.*))\\.py$", file)
36 | if match:
37 | m = match.group(1)
38 | print("Adding test cases in %s" % m)
39 | module = __import__(m)
40 | tests.addTest(unittest.defaultTestLoader.loadTestsFromModule( module ))
41 | return tests
42 |
43 | if __name__ == '__main__':
44 | test_runner = unittest.TextTestRunner(verbosity=2)
45 | #test_runner.run(importTests())
46 | ret = test_runner.run(importTests())
47 | if ret.errors or ret.failures:
48 | sys.exit('test fail')
49 |
--------------------------------------------------------------------------------
/test/txt/ANNOVAR.txt:
--------------------------------------------------------------------------------
1 | 1 161003087 161003087 C T comments: rs1000050, a SNP in Illumina SNP arrays
2 | 1 84647761 84647761 C T comments: rs6576700 or SNP_A-1780419, a SNP in Affymetrix SNP arrays
3 | 1 13133880 13133881 TC - comments: rs59770105, a 2-bp deletion
4 | 1 11326183 11326183 - AT comments: rs35561142, a 2-bp insertion
5 | 1 105293754 105293754 A ATAAA comments: rs10552169, a block substitution
6 | 1 67478546 67478546 G A comments: rs11209026 (R381Q), a SNP in IL23R associated with Crohn's disease
7 | 2 233848107 233848107 T C comments: rs2241880 (T300A), a SNP in the ATG16L1 associated with Crohn's disease
8 | 16 49303427 49303427 C T comments: rs2066844 (R702W), a non-synonymous SNP in NOD2
9 | 16 49314041 49314041 G C comments: rs2066845 (G908R), a non-synonymous SNP in NOD2
10 | 16 49321279 49321279 - C comments: rs2066847 (c.3016_3017insC), a frameshift SNP in NOD2
11 | 13 19661686 19661686 G - comments: rs1801002 (del35G), a frameshift mutation in GJB2, associated with hearing loss
12 | 13 19695176 20003944 0 - comments: a 342kb deletion encompassing GJB6, associated with hearing loss
13 |
--------------------------------------------------------------------------------
/test/txt/CASAVA18_SNP.txt:
--------------------------------------------------------------------------------
1 | # ** CASAVA depth-filtered snp calls **
2 | #$ CMDLINE /CASAVA-1.8.0a19/filterSmallVariants.pl --chrom=chr1
3 | #$ SEQ_MAX_DEPTH chr1 142.345267150165
4 | #
5 | #$ COLUMNS seq_name pos bcalls_used bcalls_filt ref Q(snp) max_gt Q(max_gt) max_gt|poly_site Q(max_gt|poly_site) A_used C_used G_used T_used
6 | chr1 10231 5 9 C 28 AC 28 AC 59 3 2 0 0
7 | chr1 10255 14 29 A 1 AA 9 AT 25 12 0 0 2
8 | chr1 10264 15 19 C 18 AC 18 AC 51 4 11 0 0
9 | chr1 10291 2 16 C 1 CC 10 CT 21 0 1 0 1
10 | chr1 10330 3 14 C 2 CC 5 AC 28 2 1 0 0
11 | chr1 13273 9 0 G 58 CG 54 CG 57 0 6 3 0
12 | chr1 14464 18 0 A 60 AT 60 AT 93 12 0 0 6
13 | chr1 14673 19 0 G 63 CG 63 CG 96 0 8 11 0
14 | chr1 14699 23 0 C 72 CG 72 CG 105 0 14 9 0
15 | chr1 14907 13 0 A 118 AG 65 AG 65 4 0 9 0
16 | chr1 14930 14 2 A 119 AG 68 AG 68 5 0 9 0
17 | chr1 14933 14 2 G 78 AG 78 AG 110 6 0 8 0
18 | chr1 14976 4 0 G 18 AG 18 AG 47 2 0 2 0
19 | chr1 15211 2 0 T 37 GG 5 GG 5 0 0 2 0
20 | chr1 15817 1 0 G 11 GT 3 GT 3 0 0 0 1
21 | chr1 15820 1 0 G 11 GT 3 GT 3 0 0 0 1
22 | chr1 16487 12 0 T 62 CT 62 CT 94 0 6 0 6
23 | chr1 17538 64 0 C 88 AC 88 AC 121 18 46 0 0
24 | chr1 17746 53 1 A 22 AG 22 AG 55 39 0 14 0
25 | chr1 17765 47 1 G 26 AG 26 AG 59 13 0 34 0
26 | chr1 20131 1 0 G 8 CG 2 CG 3 0 1 0 0
27 | chr1 20144 1 0 G 9 AG 2 AG 3 1 0 0 0
28 | chr1 20206 2 0 C 4 CT 4 CT 30 0 1 0 1
29 | chr1 20245 3 0 G 4 AG 4 AG 34 1 0 2 0
30 | chr1 20304 2 0 G 2 GG 5 CG 27 0 1 1 0
31 |
--------------------------------------------------------------------------------
/test/txt/CGA.tsv.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/txt/CGA.tsv.bz2
--------------------------------------------------------------------------------
/test/txt/assoc.fmt:
--------------------------------------------------------------------------------
1 | [format description]
2 | description=Input format for variants with multiple sample genotypes.
3 | delimiter=None
4 | variant=chr,%(pos)s,%(ref)s,%(alt)s
5 | genotype=%(geno)s
6 |
7 | [DEFAULT]
8 | pos=pos
9 | pos_comment=Field for position.
10 |
11 | ref=ref
12 | ref_comment=Field for reference allele.
13 |
14 | alt=alt
15 | alt_comment=Field for alternative allele.
16 |
17 | geno=GT
18 | geno_comment=Field to extract genotype.
19 |
20 | [chr]
21 | index=1
22 | type=VARCHAR(20)
23 | adj=RemoveLeading('chr')
24 | comment=Chromosome
25 |
26 | [pos]
27 | index=2
28 | type=INTEGER NOT NULL
29 | comment=1-based Position of the snp
30 |
31 | [ref]
32 | index=3
33 | type=VARCHAR(255)
34 | comment=Reference allele
35 |
36 | [alt]
37 | index=4
38 | type=VARCHAR(255)
39 | comment=Alternative allele
40 |
41 | [GT]
42 | index=5:
43 | type=INTEGER
44 | comment=Gentoype w/ 0,1,2 codings
45 |
--------------------------------------------------------------------------------
/test/txt/complteGenomics.tsv.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/txt/complteGenomics.tsv.bz2
--------------------------------------------------------------------------------
/test/txt/invalid.tsv:
--------------------------------------------------------------------------------
1 | CHR POS START REF ALT
2 | 1 75927 86064 G C
3 | 1 75927 86064 G
4 | 1 76193 86330 A G
5 | 1 77052 87189 G A
6 | 1 78178 88315 G A
7 | 1 78200 88337 G A
8 | 1 81398 91535 G T
9 | 1 98172 108309 T C
10 | 1 223335 233472 C G
11 | 1 224622 234759 A T
12 | 1 225791 235928 G A
13 |
--------------------------------------------------------------------------------
/test/txt/pileup.indel:
--------------------------------------------------------------------------------
1 | chr10 57162 D1 G * homo 26 3 6
2 | chr10 62899 I4 AAAA * hete 31 17 33
3 | chr10 85429 I1 A * homo 38 29 32
4 | chr10 87126 I24 TGCATTTACGTGATCTTGGCTCAC * hete 51 10 38
5 | chr10 87668 D3 CTC * hete 52 27 34
6 | chr10 89301 D1 A * hete 37 7 45
7 | chr10 89448 I3 AGG * hete 27 4 31
8 | chr10 93681 I1 G * hete 21 12 111
9 | chr10 94117 I3 CAA * hete 29 29 81
10 | chr10 94848 D3 TTA * hete 54 7 48
11 | chr10 95775 I1 T * hete 44 6 44
12 | chr10 97572 D1 T * hete 44 8 55
13 | chr10 98719 I1 T * hete 48 13 39
14 | chr10 99022 I1 T * homo 52 19 36
15 | chr10 100224 D6 CCCTAA * hete 41 12 31
16 | chr10 100433 D6 ACCCTC * hete 50 2 20
17 | chr10 100799 I1 G * hete 50 4 22
18 | chr10 101382 D1 G * hete 54 12 39
19 | chr10 101729 D3 GTA * hete 51 19 58
20 | chr10 103093 D1 T * homo 57 23 33
21 | chr10 103731 D2 GA * hete 46 6 28
22 | chr10 106207 D9 TTGTTTTTG * hete 46 6 24
23 | chr10 106216 D4 TTTT * homo 49 11 19
24 | chr10 107344 I1 C * hete 54 6 32
25 | chr10 108119 I1 G * hete 31 7 19
26 | chr10 108176 I1 A * hete 46 3 22
27 | chr10 110565 D2 AA * hete 47 4 11
28 | chr10 110582 D2 AG * hete 51 2 13
29 | chr10 110806 D7 TTTTTTT * hete 55 5 14
30 | chr10 110829 I3 GGG * hete 45 2 13
31 |
--------------------------------------------------------------------------------
/test/txt/sample_1_chr22.txt:
--------------------------------------------------------------------------------
1 | #chr pos end ref all1 all2 dbsnp SMP1 LS SMP2 LS SMP3 LS
2 | 22 16123379 16123379 A A G rs74370004 AA 10 AG 6 AG 25
3 | 22 16123425 16123425 T T C rs79052403 TT 9 CT 2 TT 14
4 | 22 16123469 16123469 T T G GG 4 GG 7 GT 16
5 | 22 16123488 16123488 G G T GG 5 GG 12 GG 14
6 | 22 16123496 16123496 G G A GG 6 GG 14 GG 15
7 | 22 16123524 16123524 A A C AA 4 AA 2 AA 14
8 | 22 16123531 16123531 G G T rs62226612 GG 5 GG 11 GG 13
9 | 22 16123762 16123762 C C G CC 33 CC 48 CC 25
10 | 22 16123793 16123793 G G A AG 11 AG 8 AG 21
11 |
--------------------------------------------------------------------------------
/test/txt/sample_chr22.txt:
--------------------------------------------------------------------------------
1 | #chr pos end ref all1 all2 dbsnp SMP1 LS SMP2 LS SMP3 LS
2 | 22 16060526 16060526 T TCT - TT 3 T- 4 -- 4
3 | 22 16078617 16078618 TG TG - TT 0 T- 4 TT 0
4 | 22 16123379 16123379 A A G rs74370004 AA 10 AG 6 AG 25
5 | 22 16123409 16123410 - - G GG 10 GG 23 G- 14
6 | 22 16123425 16123425 T T C rs79052403 TT 9 CT 2 TT 14
7 | 22 16404838 16404839 - - GA GG 3 AG 5 GG 4
8 |
--------------------------------------------------------------------------------
/test/txt/variants.txt:
--------------------------------------------------------------------------------
1 | 1 203148112 T -
2 | 1 203148168 G A
3 | 1 203148202 G C
4 | 1 203148224 G A
5 | 1 203148265 GG T
6 | 1 203148284 T C
7 | 1 203148294 G T
8 | 1 203148359 C A
9 | 1 203148360 G A
10 | 1 203148360 G C
11 | 1 203148510 G T
12 | 1 203148513 A T
13 | 1 203148633 A G
14 | 1 203148677 T C
15 | 1 203148727 C T
16 | 1 203148868 T C
17 | 1 203148989 - C
18 | 10 58118181 A C
19 | 10 58118185 C T
20 | 10 58120990 C T
21 |
--------------------------------------------------------------------------------
/test/vcf/CEU.vcf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/vcf/CEU.vcf.gz
--------------------------------------------------------------------------------
/test/vcf/CEU.vcf.gz.tbi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/vcf/CEU.vcf.gz.tbi
--------------------------------------------------------------------------------
/test/vcf/CEU_dup.vcf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/vcf/CEU_dup.vcf.gz
--------------------------------------------------------------------------------
/test/vcf/EMPTY.vcf:
--------------------------------------------------------------------------------
1 | ##fileformat=VCFv4.0
2 | ##FILTER=
3 | ##FORMAT=
4 | ##FORMAT=
5 | ##FORMAT=
6 | ##FORMAT=
7 | ##FilterLiftedVariants="analysis_type=FilterLiftedVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null reference_sequence=human_g1k_v37.fasta rodBind=[/tmp/0.251173662095429.sorted.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub"
8 | ##INFO=
9 | ##INFO=
10 | ##INFO=
11 | ##INFO=
12 | ##INFO=
13 | ##INFO=
14 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMP2
15 |
--------------------------------------------------------------------------------
/test/vcf/chromX.vcf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/vcf/chromX.vcf.gz
--------------------------------------------------------------------------------
/test/vcf/compare.vcf:
--------------------------------------------------------------------------------
1 | ##fileformat=VCFv4.0
2 | ##INFO=
3 | ##INFO=
4 | ##INFO=
5 | ##INFO=
6 | ##INFO=
7 | ##INFO=
8 | ##FILTER=
9 | ##FORMAT=
10 | ##FORMAT=
11 | ##FORMAT=
12 | ##FORMAT=
13 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMP1 SAMP2
14 | 1 31705 rs3843004 A G 23 PASS DP=9;NS=1 GT 1/1 ./.
15 | 1 50195 rs62637816 T C,G 99 PASS DP=9;NS=1 GT 0/2 0/2
16 | 1 50589 rs2531295 C A 29 PASS DP=4;NS=1 GT ./. 0/1
17 |
--------------------------------------------------------------------------------
/test/vcf/hdf5_test.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vatlab/varianttools/a05c68b1f1ce23a85b6114796abc4d72fd916ce8/test/vcf/hdf5_test.h5
--------------------------------------------------------------------------------