├── .gitignore ├── .gitmodules ├── .travis.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cmake └── Modules │ ├── CheckAndSetStackProtectorStrong.cmake │ ├── CheckAndSetStdCXX2011Flag.cmake │ ├── FindCURL.cmake │ ├── FindGPerftools.cmake │ ├── FindIPP.cmake │ ├── FindOpenMPv4.cmake │ ├── FindProtobufWrapper.cmake │ ├── FindRapidJSON.cmake │ ├── FindTileDB.cmake │ ├── Findhtslib.cmake │ ├── Findlibcsv.cmake │ ├── Findlibdbi.cmake │ ├── Findlibuuid.cmake │ ├── Findsafestringlib.cmake │ └── libdbi_test_program.c ├── contrib └── README.md ├── docker ├── requirements.txt └── vcf_combiner │ ├── Dockerfile │ ├── etc │ └── profile.d │ │ └── cont-env.sh │ ├── root │ └── .bashrc │ └── usr │ ├── bin │ ├── combine_vcf.py │ ├── container-entrypoint │ └── container-usage │ └── share │ ├── cont-docs │ ├── README │ └── combine_vcf_help.txt │ ├── cont-entry │ └── cont-env.sh │ ├── cont-intel │ └── vid.json │ ├── cont-layer │ └── README │ ├── cont-lib │ ├── autoload │ │ ├── README │ │ └── genomicsdb-env.sh │ ├── cont-env.sh │ ├── cont-lib.sh │ └── parser-simple-config.sh │ └── cont-volume │ └── README ├── example ├── CMakeLists.txt ├── java │ ├── TestBufferStreamGenomicsDBImporter.java │ ├── TestGenomicsDB.java │ ├── TestGenomicsDBImporterWithMergedVCFHeader.java │ └── TestGenomicsDBSparkHDFS.java └── src │ ├── test_genomicsdb_bcf_generator.cc │ └── test_genomicsdb_importer.cc ├── pom.xml ├── requirements.txt ├── src ├── CMakeLists.txt ├── main │ ├── CMakeLists.txt │ ├── cpp │ │ ├── include │ │ │ ├── config │ │ │ │ ├── genomicsdb_config_base.h │ │ │ │ ├── json_config.h │ │ │ │ └── variant_query_config.h │ │ │ ├── genomicsdb │ │ │ │ ├── genomicsdb_columnar_field.h │ │ │ │ ├── genomicsdb_iterators.h │ │ │ │ ├── genomicsdb_jni_exception.h │ │ │ │ ├── genomicsdb_multid_vector_field.h │ │ │ │ ├── query_variants.h │ │ │ │ ├── variant.h │ │ │ │ ├── variant_array_schema.h │ │ │ │ ├── variant_cell.h │ │ │ │ ├── variant_field_data.h │ │ │ │ └── variant_storage_manager.h │ │ │ ├── loader │ │ │ │ ├── column_partition_batch.h │ │ │ │ ├── genomicsdb_importer.h │ │ │ │ ├── load_operators.h │ │ │ │ ├── tiledb_loader.h │ │ │ │ ├── tiledb_loader_file_base.h │ │ │ │ └── tiledb_loader_text_file.h │ │ │ ├── query_operations │ │ │ │ ├── broad_combined_gvcf.h │ │ │ │ └── variant_operations.h │ │ │ ├── utils │ │ │ │ ├── command_line.h │ │ │ │ ├── gt_common.h │ │ │ │ ├── headers.h │ │ │ │ ├── histogram.h │ │ │ │ ├── lut.h │ │ │ │ ├── memory_measure.h │ │ │ │ ├── profiling.h │ │ │ │ ├── timer.h │ │ │ │ ├── vid_mapper.h │ │ │ │ ├── vid_mapper_pb.h │ │ │ │ └── vid_mapper_sql.h │ │ │ └── vcf │ │ │ │ ├── genomicsdb_bcf_generator.h │ │ │ │ ├── known_field_info.h │ │ │ │ ├── vcf.h │ │ │ │ ├── vcf2binary.h │ │ │ │ └── vcf_adapter.h │ │ └── src │ │ │ ├── config │ │ │ ├── genomicsdb_config_base.cc │ │ │ ├── json_config.cc │ │ │ └── variant_query_config.cc │ │ │ ├── genomicsdb │ │ │ ├── genomicsdb_columnar_field.cc │ │ │ ├── genomicsdb_iterators.cc │ │ │ ├── genomicsdb_multid_vector_field.cc │ │ │ ├── query_variants.cc │ │ │ ├── variant.cc │ │ │ ├── variant_array_schema.cc │ │ │ ├── variant_cell.cc │ │ │ ├── variant_field_data.cc │ │ │ ├── variant_field_handler.cc │ │ │ └── variant_storage_manager.cc │ │ │ ├── loader │ │ │ ├── genomicsdb_importer.cc │ │ │ ├── load_operators.cc │ │ │ ├── tiledb_loader.cc │ │ │ ├── tiledb_loader_file_base.cc │ │ │ └── tiledb_loader_text_file.cc │ │ │ ├── query_operations │ │ │ ├── broad_combined_gvcf.cc │ │ │ └── variant_operations.cc │ │ │ ├── utils │ │ │ ├── command_line.cc │ │ │ ├── histogram.cc │ │ │ ├── known_field_info.cc │ │ │ ├── lut.cc │ │ │ ├── memory_measure.cc │ │ │ ├── tiledb_utils.cc │ │ │ ├── timer.cc │ │ │ ├── vid_mapper.cc │ │ │ ├── vid_mapper_pb.cc │ │ │ └── vid_mapper_sql.cc │ │ │ └── vcf │ │ │ ├── genomicsdb_bcf_generator.cc │ │ │ ├── vcf2binary.cc │ │ │ └── vcf_adapter.cc │ ├── java │ │ └── com │ │ │ └── intel │ │ │ └── genomicsdb │ │ │ ├── Constants.java │ │ │ ├── GenomicsDBLibLoader.java │ │ │ ├── GenomicsDBUtils.java │ │ │ ├── GenomicsDBUtilsJni.java │ │ │ ├── exception │ │ │ └── GenomicsDBException.java │ │ │ ├── importer │ │ │ ├── Constants.java │ │ │ ├── GenomicsDBImporter.java │ │ │ ├── GenomicsDBImporterJni.java │ │ │ ├── GenomicsDBImporterStreamWrapper.java │ │ │ ├── MultiChromosomeIterator.java │ │ │ ├── SilentByteBufferStream.java │ │ │ ├── extensions │ │ │ │ ├── CallSetMapExtensions.java │ │ │ │ ├── JsonFileExtensions.java │ │ │ │ └── VidMapExtensions.java │ │ │ └── model │ │ │ │ ├── ChromosomeInterval.java │ │ │ │ └── SampleInfo.java │ │ │ ├── model │ │ │ ├── BatchCompletionCallbackFunctionArgument.java │ │ │ ├── CommandLineImportConfig.java │ │ │ └── ImportConfig.java │ │ │ ├── reader │ │ │ ├── ChrArrayFolderComparator.java │ │ │ ├── GenomicsDBFeatureIterator.java │ │ │ ├── GenomicsDBFeatureReader.java │ │ │ ├── GenomicsDBFeatureReaderJni.java │ │ │ ├── GenomicsDBQueryStream.java │ │ │ └── GenomicsDBTimer.java │ │ │ └── spark │ │ │ ├── GenomicsDBConfiguration.java │ │ │ ├── GenomicsDBInputFormat.java │ │ │ ├── GenomicsDBInputSplit.java │ │ │ ├── GenomicsDBJavaSparkFactory.java │ │ │ ├── GenomicsDBPartitionInfo.java │ │ │ ├── GenomicsDBQueryInfo.java │ │ │ └── GenomicsDBRecordReader.java │ ├── jni │ │ ├── include │ │ │ ├── genomicsdb_GenomicsDBImporter.h │ │ │ ├── genomicsdb_GenomicsDBLibLoader.h │ │ │ ├── genomicsdb_GenomicsDBQueryStream.h │ │ │ ├── genomicsdb_GenomicsDBUtils.h │ │ │ └── jni_mpi_init.h │ │ └── src │ │ │ ├── genomicsdb_GenomicsDBImporter.cc │ │ │ ├── genomicsdb_GenomicsDBQueryStream.cc │ │ │ ├── genomicsdb_GenomicsDBUtils.cc │ │ │ └── genomicsdb_jni_init.cc │ └── scala │ │ └── com │ │ └── intel │ │ └── genomicsdb │ │ ├── GenomicsDBContext.scala │ │ ├── GenomicsDBPartition.scala │ │ ├── GenomicsDBRDD.scala │ │ └── GenomicsDBScalaSparkFactory.scala ├── resources │ ├── CMakeLists.txt │ ├── gendb_state_for_tests.txt │ ├── genomicsdb-spark-submit.sh │ ├── genomicsdb_callsets_mapping.proto │ ├── genomicsdb_coordinates.proto │ ├── genomicsdb_export_config.proto │ ├── genomicsdb_import_config.proto │ └── genomicsdb_vid_mapping.proto └── test │ ├── CMakeLists.txt │ ├── cpp │ ├── CMakeLists.txt │ ├── include │ │ └── test_mapping_data_loader.h │ └── src │ │ ├── CMakeLists.txt │ │ ├── main_testall.cc │ │ ├── test_mapping_data_loader.cc │ │ ├── test_multid_vector.cc │ │ └── test_non_diploid_mapper.cc │ └── java │ └── com │ └── intel │ └── genomicsdb │ ├── GenomicsDBTestUtils.java │ ├── importer │ └── GenomicsDBImporterSpec.java │ ├── model │ ├── CommandLineImportConfigSpec.java │ ├── GenomicsDBCallsetsMapProtoSpec.java │ ├── GenomicsDBExportConfigurationSpec.java │ ├── GenomicsDBImportConfigurationSpec.java │ ├── GenomicsDBVidMappingProtoSpec.java │ └── ImportConfigSpec.java │ ├── reader │ └── ChrArrayFolderComparatorSpec.java │ └── spark │ └── GenomicsDBInputFormatTest.java ├── tests ├── golden_outputs │ ├── info_ops0.vcf │ ├── info_ops1.vcf │ ├── java_genomicsdb_importer_from_vcfs_t0_1_2_multi_contig_vcf_0_18000 │ ├── java_genomicsdb_importer_from_vcfs_t0_1_2_multi_contig_vcf_12150_18000 │ ├── java_t0_1_2_vcf_at_0 │ ├── java_t0_1_2_vcf_at_12150 │ ├── java_t0_1_2_vcf_at_multiple_positions │ ├── java_t0_1_2_vcf_sites_only_at_0 │ ├── java_t6_7_8_vcf_at_0 │ ├── java_t6_7_8_vcf_at_8029500 │ ├── java_t6_7_8_vcf_at_8029501 │ ├── min_PL_spanning_deletion_load_stdout │ ├── min_PL_spanning_deletion_vcf │ ├── min_PL_spanning_deletion_vcf_no_min_PL │ ├── spark_t0_1_2_combined │ ├── spark_t0_1_2_vcf_at_0 │ ├── spark_t0_1_2_vcf_at_12100 │ ├── spark_t0_1_2_vcf_at_12150 │ ├── spark_t0_haploid_triploid_1_2_3_triploid_deletion_java_vcf │ ├── spark_t0_haploid_triploid_1_2_3_triploid_deletion_java_vcf_produce_GT │ ├── spark_t0_overlapping_at_12202 │ ├── spark_t6_7_8_vcf_at_0 │ ├── spark_t6_7_8_vcf_at_8029500 │ ├── spark_t6_7_8_vcf_at_8029500-8029500 │ ├── t0_1_2_DS_ID_calls_at_0 │ ├── t0_1_2_DS_ID_calls_at_0_phased_GT │ ├── t0_1_2_DS_ID_variants_at_0 │ ├── t0_1_2_DS_ID_variants_at_0_phased_GT │ ├── t0_1_2_DS_ID_vcf_at_0 │ ├── t0_1_2_all_asa_java_query_vcf │ ├── t0_1_2_all_asa_loading │ ├── t0_1_2_calls_at_0 │ ├── t0_1_2_calls_at_0_phased_GT │ ├── t0_1_2_calls_at_0_with_PL_only │ ├── t0_1_2_calls_at_12100 │ ├── t0_1_2_calls_at_12100_12141 │ ├── t0_1_2_calls_at_12100_12141_12150 │ ├── t0_1_2_calls_at_12100_12141_to_12150 │ ├── t0_1_2_calls_at_12100_12141_to_12150_12300_17384 │ ├── t0_1_2_calls_at_12150 │ ├── t0_1_2_calls_at_12150_phased_GT │ ├── t0_1_2_calls_at_multiple_positions │ ├── t0_1_2_combined │ ├── t0_1_2_loading │ ├── t0_1_2_variants_at_0 │ ├── t0_1_2_variants_at_0_phased_GT │ ├── t0_1_2_variants_at_12150 │ ├── t0_1_2_variants_at_12150_phased_GT │ ├── t0_1_2_vcf_at_0 │ ├── t0_1_2_vcf_at_0_with_FILTER │ ├── t0_1_2_vcf_at_12150 │ ├── t0_1_2_vcf_at_multiple_positions │ ├── t0_1_2_vcf_sites_only_at_0 │ ├── t0_haploid_triploid_1_2_3_triploid_deletion_java_vcf │ ├── t0_haploid_triploid_1_2_3_triploid_deletion_java_vcf_produce_GT │ ├── t0_haploid_triploid_1_2_3_triploid_deletion_java_vcf_produce_GT_for_min_PL │ ├── t0_haploid_triploid_1_2_3_triploid_deletion_java_vcf_sites_only │ ├── t0_haploid_triploid_1_2_3_triploid_deletion_loading │ ├── t0_haploid_triploid_1_2_3_triploid_deletion_vcf │ ├── t0_haploid_triploid_1_2_3_triploid_deletion_vcf_produce_GT │ ├── t0_haploid_triploid_1_2_3_triploid_deletion_vcf_produce_GT_for_min_value_PL │ ├── t0_haploid_triploid_1_2_3_triploid_deletion_vcf_sites_only │ ├── t0_overlapping │ ├── t0_overlapping_at_12202 │ ├── t0_with_missing_PL_SB_fields_t1.vcf │ ├── t0_with_missing_PL_SB_fields_t1_calls.json │ ├── t6_7_8_calls_at_0 │ ├── t6_7_8_calls_at_0_phased_GT │ ├── t6_7_8_calls_at_8029500 │ ├── t6_7_8_calls_at_8029500_phased_GT │ ├── t6_7_8_loading │ ├── t6_7_8_new_field_gatk.vcf │ ├── t6_7_8_variants_at_0 │ ├── t6_7_8_variants_at_0_phased_GT │ ├── t6_7_8_variants_at_8029500 │ ├── t6_7_8_variants_at_8029500_phased_GT │ ├── t6_7_8_vcf_at_0 │ ├── t6_7_8_vcf_at_8029500 │ ├── t6_7_8_vcf_at_8029500-8029500 │ ├── t6_7_8_vcf_sites_only_at_0 │ └── test_new_fields_MLEAC_only.json ├── hostfile ├── inputs │ ├── callsets │ │ ├── info_ops.json │ │ ├── min_PL_spanning_deletion.json │ │ ├── t0_1_2.csv │ │ ├── t0_1_2.json │ │ ├── t0_1_2_all_asa.json │ │ ├── t0_1_2_as_array.json │ │ ├── t0_1_2_buffer.json │ │ ├── t0_1_2_buffer_mapping.json │ │ ├── t0_1_2_combined.json │ │ ├── t0_1_2_csv.json │ │ ├── t0_haploid_triploid_1_2_3_triploid_deletion.json │ │ ├── t0_overlapping.json │ │ ├── t0_with_missing_PL_SB_fields_t1.json │ │ └── t6_7_8.json │ ├── chr1_10MB.fasta.gz │ ├── chr1_10MB.fasta.gz.fai │ ├── chr1_10MB.fasta.gz.gzi │ ├── template_vcf_header.vcf │ ├── vcfs │ │ ├── info_op0.vcf.gz │ │ ├── info_op0.vcf.gz.tbi │ │ ├── info_op1.vcf.gz │ │ ├── info_op1.vcf.gz.tbi │ │ ├── info_op2.vcf.gz │ │ ├── info_op2.vcf.gz.tbi │ │ ├── min_PL_spanning_deletion.vcf │ │ ├── min_PL_spanning_deletion.vcf.gz │ │ ├── min_PL_spanning_deletion.vcf.gz.tbi │ │ ├── t0.vcf.gz │ │ ├── t0.vcf.gz.tbi │ │ ├── t0_1_2_combined.vcf.gz │ │ ├── t0_1_2_combined.vcf.gz.tbi │ │ ├── t0_asa.vcf.gz │ │ ├── t0_asa.vcf.gz.tbi │ │ ├── t0_haploid_triploid.vcf.gz │ │ ├── t0_haploid_triploid.vcf.gz.tbi │ │ ├── t0_overlapping.vcf.gz │ │ ├── t0_overlapping.vcf.gz.tbi │ │ ├── t0_with_missing_PL_SB_fields.vcf.gz │ │ ├── t0_with_missing_PL_SB_fields.vcf.gz.tbi │ │ ├── t1.vcf.gz │ │ ├── t1.vcf.gz.tbi │ │ ├── t1_asa.vcf.gz │ │ ├── t1_asa.vcf.gz.tbi │ │ ├── t2.vcf.gz │ │ ├── t2.vcf.gz.tbi │ │ ├── t2_asa.vcf.gz │ │ ├── t2_asa.vcf.gz.tbi │ │ ├── t3_triploid_deletion.vcf.gz │ │ ├── t3_triploid_deletion.vcf.gz.tbi │ │ ├── t6.vcf.gz │ │ ├── t6.vcf.gz.tbi │ │ ├── t7.vcf.gz │ │ ├── t7.vcf.gz.tbi │ │ ├── t8.vcf.gz │ │ └── t8.vcf.gz.tbi │ ├── vid.json │ ├── vid_DS_ID.json │ ├── vid_DS_ID_phased_GT.json │ ├── vid_MLEAC_MLEAF.json │ ├── vid_all_asa.json │ ├── vid_as_array.json │ ├── vid_info_ops0.json │ ├── vid_info_ops1.json │ └── vid_phased_GT.json ├── load_genomics_metadata.sh ├── run.py └── run_spark_hdfs.py └── tools ├── CMakeLists.txt ├── include └── vcfdiff.h └── src ├── consolidate_tiledb_array.cc ├── create_tiledb_workspace.cc ├── gt_mpi_gather.cc ├── vcf2tiledb.cc ├── vcf_histogram.cc └── vcfdiff.cc /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.o 3 | *.a 4 | *.so 5 | *.d 6 | test_inputs 7 | my_workspace* 8 | .directory 9 | examples/bin/* 10 | examples/obj/* 11 | *temp* 12 | .project 13 | .cproject 14 | bin/* 15 | *.gcno 16 | *.gcda 17 | tests/coverage.info 18 | *.class 19 | *.jar 20 | *.iml 21 | .idea/* 22 | /target/ 23 | .tmp_loader.json 24 | build/ 25 | src/main/java/com/intel/genomicsdb/model/GenomicsDBCallsetsMapProto.java 26 | src/main/java/com/intel/genomicsdb/model/GenomicsDBExportConfiguration.java 27 | src/main/java/com/intel/genomicsdb/model/GenomicsDBImportConfiguration.java 28 | src/main/java/com/intel/genomicsdb/model/GenomicsDBVidMapProto.java 29 | src/main/java/com/intel/genomicsdb/model/Coordinates.java 30 | src/main/java/com/intel/genomicsdb/GenomicsDBCallsetsMapProto.java 31 | src/main/java/com/intel/genomicsdb/GenomicsDBExportConfiguration.java 32 | src/main/java/com/intel/genomicsdb/GenomicsDBImportConfiguration.java 33 | src/main/java/com/intel/genomicsdb/GenomicsDBVidMapProto.java 34 | 35 | #emacs 36 | *~ 37 | \#*\# 38 | .\#* 39 | TAGS 40 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "dependencies/htslib"] 2 | path = dependencies/htslib 3 | url = https://github.com/Intel-HLS/htslib.git 4 | [submodule "dependencies/TileDB"] 5 | path = dependencies/TileDB 6 | url = https://github.com/Intel-HLS/TileDB.git 7 | [submodule "dependencies/RapidJSON"] 8 | path = dependencies/RapidJSON 9 | url = https://github.com/miloyip/rapidjson.git 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | #Adapted from http://gronlier.fr/blog/2015/01/adding-code-coverage-to-your-c-project/ 2 | sudo: required 3 | dist: trusty 4 | 5 | language: python 6 | python: "3.4" 7 | 8 | services: 9 | - postgresql 10 | 11 | env: 12 | global: 13 | - LD_LIBRARY_PATH=$TRAVIS_BUILD_DIR/dependencies/libcsv/.libs 14 | - JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 15 | - GENOMICSDB_BUILD_DIR=$TRAVIS_BUILD_DIR/build 16 | - GENOMICSDB_INSTALL_DIR=$GENOMICSDB_BUILD_DIR/install 17 | - PATH=$GENOMICSDB_INSTALL_DIR/bin:$TRAVIS_BUILD_DIR/bin:$PATH 18 | - GENOMICSDB_RELEASE_VERSION=0.10.2 19 | - CLASSPATH=$GENOMICSDB_INSTALL_DIR/bin/genomicsdb-${GENOMICSDB_RELEASE_VERSION}-jar-with-dependencies.jar:$GENOMICSDB_BUILD_DIR/target/genomicsdb-${GENOMICSDB_RELEASE_VERSION}-examples.jar:. 20 | 21 | install: 22 | #Install dependencies 23 | - pip install -r requirements.txt 24 | - sudo pip2 install jsondiff 25 | - sudo apt-get -y install lcov mpich zlib1g-dev libssl-dev rsync cmake uuid-dev libcurl4-openssl-dev 26 | - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y 27 | - sudo add-apt-repository -y ppa:openjdk-r/ppa 28 | - sudo apt-get update -q 29 | - sudo apt-get install g++-4.9 -y 30 | - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.9 60 31 | - sudo apt-get install libdbi-dev libdbd-pgsql -y 32 | - sudo apt-get -y install openjdk-8-jdk icedtea-plugin 33 | - jdk_switcher use openjdk8 34 | - echo $JAVA_HOME 35 | - export PATH=$JAVA_HOME/bin:$PATH 36 | - which javac && javac -version 37 | - which java && java -version 38 | #Copy protobuf binaries 39 | - wget https://github.com/Intel-HLS/GenomicsDB/releases/download/0.4.0/protobuf-3.0.2-trusty.tar.gz -O protobuf-3.0.2-trusty.tar.gz 40 | - tar xzf protobuf-3.0.2-trusty.tar.gz && sudo rsync -a protobuf-3.0.2-trusty/ /usr/ 41 | #install gtest 42 | #- sudo apt-get -y install libgtest0 libgtest-dev 43 | # above package (libgtest0) not built for 14.04 44 | - sudo apt-get -y install libgtest-dev 45 | - cd /usr/src/gtest 46 | - sudo cmake . -DBUILD_SHARED_LIBS=1 47 | - sudo make 48 | - sudo mv libgtest* /usr/lib 49 | #Install libcsv 50 | - cd $TRAVIS_BUILD_DIR 51 | - cd dependencies && git clone https://github.com/rgamble/libcsv && cd libcsv && ./configure && make 52 | - cd $TRAVIS_BUILD_DIR 53 | # install lcov to coveralls conversion + upload tool 54 | - gem install coveralls-lcov 55 | - mkdir -p $GENOMICSDB_BUILD_DIR 56 | 57 | before_script: 58 | - cd $GENOMICSDB_BUILD_DIR && lcov --directory . --zerocounters 59 | - psql -U postgres -c 'create database gendb' 60 | - psql -U postgres gendb < $TRAVIS_BUILD_DIR/src/resources/gendb_state_for_tests.txt 61 | - psql gendb -U postgres -c 'select count(*) from reference' 62 | 63 | script: 64 | #- pylint $TRAVIS_BUILD_DIR/docker/vcf_combiner/usr/bin/combine_vcf.py 65 | - pytest $TRAVIS_BUILD_DIR/docker/vcf_combiner/usr/bin/combine_vcf.py 66 | - cd $GENOMICSDB_BUILD_DIR 67 | - cmake $TRAVIS_BUILD_DIR -DBUILD_JAVA=1 -DCMAKE_BUILD_TYPE=Coverage -DCMAKE_INSTALL_PREFIX=$GENOMICSDB_INSTALL_DIR -DLIBCSV_DIR=$TRAVIS_BUILD_DIR/dependencies/libcsv -DGENOMICSDB_RELEASE_VERSION=$GENOMICSDB_RELEASE_VERSION -DMAVEN_QUIET=True -DENABLE_LIBCURL=True 68 | - ln -s $TRAVIS_BUILD_DIR/tests 69 | - make -j 4 && make install && make test ARGS=-V 70 | 71 | after_success: 72 | - cd $GENOMICSDB_INSTALL_DIR && lcov --list tests/coverage.info # debug before upload 73 | #- coveralls-lcov --repo-token ${COVERALLS_TOKEN} tests/coverage.info # uploads to coveralls 74 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016-2018 Intel Corporation 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | 23 | ------------------------------------------------------------------------------------- 24 | 25 | We use libcsv (https://sourceforge.net/projects/libcsv/) to parse CSV files. libcsv 26 | is licensed under the GNU Library or Lesser General Public License version 2.0 (LGPLv2). 27 | So, if you are re-distributing binaries or object files, they may be subject to LGPLv2 28 | terms. Please ensure that any binaries/object files you distribute are compliant with 29 | LGPLv2. You can disable libcsv usage by not setting the USE_LIBCSV and LIBCSV_DIR flags 30 | during compilation. However, your binaries/executables will not be able to import CSV 31 | files into TileDB. 32 | 33 | 34 | ------------------------------------------------------------------------------------- 35 | We use Google protocol buffers in GenomicsDB for representing the mapping information. 36 | Quoting from the protobuf license 37 | (https://github.com/google/protobuf/blob/master/LICENSE): 38 | "Code generated by the Protocol Buffer compiler is owned by the owner of the input file 39 | used when generating it. This code is not standalone and requires a support library to be 40 | linked with it. This support library is itself covered by the above license." 41 | The .proto files and the generated C++ and Java source files are covered by the license 42 | text in lines 1-20 in this file. 43 | If you distribute the GenomicsDB binaries with the protobuf support library linked in 44 | statically, you must specify that the protobuf support library (in binary form) is covered 45 | by the Protocol Buffers license (https://github.com/google/protobuf/blob/master/LICENSE). 46 | 47 | ------------------------------------------------------------------------------------- 48 | 49 | We use libuuid (https://sourceforge.net/projects/libuuid/) which is licensed under the GNU Library 50 | or Lesser General Public License version 2.0 (LGPLv2). So, if you are re-distributing binaries or 51 | object files, they may be subject to LGPLv2 terms. Please ensure that any binaries/object files you 52 | distribute are compliant with LGPLv2. 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DISCONTINUATION OF PROJECT # 2 | This project will no longer be maintained by Intel. 3 | This project has been identified as having known security escapes. 4 | Intel has ceased development and contributions including, but not limited to, maintenance, bug fixes, new releases, or updates, to this project. 5 | Intel no longer accepts patches to this project. 6 | NOTE: GenomicsDB will no longer be developed or maintained by Intel Corp. Development is continuing at a fork https://github.com/GenomicsDB/GenomicsDB (not maintained by Intel Corp). Version 0.10.2 is the last version of GenomicsDB released by Intel. 7 | 8 | This repo (https://github.com/Intel-HLS/GenomicsDB) will be closed at the end of 2018 (December 31, 2018). Please clone the repo and its dependencies if needed. 9 | 10 | The GenomicsDB documentation for users is hosted as a Github wiki: 11 | https://github.com/Intel-HLS/GenomicsDB/wiki 12 | 13 | ## About GenomicsDB License 14 | Copyright header in GenomicsDB license includes a range of years as 15 | ``` 16 | Copyright (c) 2016-2018 Intel Corporation 17 | ``` 18 | Every year in the range is inclusive and is a “copyrightable” year. We 19 | follow the convention as mentioned in the GNU.org [Copyright Notices.](https://www.gnu.org/prep/maintain/html_node/Copyright-Notices.html) 20 | -------------------------------------------------------------------------------- /cmake/Modules/CheckAndSetStackProtectorStrong.cmake: -------------------------------------------------------------------------------- 1 | include(CheckCXXCompilerFlag) 2 | #Check -fstack-protector-strong 3 | macro (CHECK_AND_SET_STACK_PROTECTOR_STRONG_FLAG STACK_PROTECTOR_STRONG_FOUND) 4 | set(BACKUP_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector-strong") 6 | CHECK_CXX_COMPILER_FLAG(-std=c++11 ${STACK_PROTECTOR_STRONG_FOUND}) 7 | if(NOT ${STACK_PROTECTOR_STRONG_FOUND}) 8 | set(CMAKE_CXX_FLAGS "${BACKUP_CMAKE_CXX_FLAGS}") 9 | endif() 10 | endmacro() 11 | -------------------------------------------------------------------------------- /cmake/Modules/CheckAndSetStdCXX2011Flag.cmake: -------------------------------------------------------------------------------- 1 | include(CheckCXXCompilerFlag) 2 | #Check C++ 2011 support 3 | macro (CHECK_AND_SET_STD_CXX_2011_FLAG CXX_STD_2011_FOUND) 4 | set(BACKUP_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 6 | CHECK_CXX_COMPILER_FLAG(-std=c++11 ${CXX_STD_2011_FOUND}) 7 | if(NOT ${CXX_STD_2011_FOUND}) 8 | set(CMAKE_CXX_FLAGS "${BACKUP_CMAKE_CXX_FLAGS}") 9 | endif() 10 | endmacro() 11 | -------------------------------------------------------------------------------- /cmake/Modules/FindCURL.cmake: -------------------------------------------------------------------------------- 1 | # - Find curl 2 | # Find the native CURL headers and libraries. 3 | # 4 | # CURL_INCLUDE_DIRS - where to find curl/curl.h, etc. 5 | # CURL_LIBRARIES - List of libraries when using curl. 6 | # CURL_FOUND - True if curl found. 7 | # CURL_VERSION_STRING - the version of curl found (since CMake 2.8.8) 8 | 9 | #============================================================================= 10 | # Copyright 2006-2009 Kitware, Inc. 11 | # Copyright 2012 Rolf Eike Beer 12 | # 13 | # Distributed under the OSI-approved BSD License (the "License"); 14 | # see accompanying file Copyright.txt for details. 15 | # 16 | # This software is distributed WITHOUT ANY WARRANTY; without even the 17 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 18 | # See the License for more information. 19 | #============================================================================= 20 | # (To distribute this file outside of CMake, substitute the full 21 | # License text for the above reference.) 22 | 23 | # Look for the header file. 24 | find_path(CURL_INCLUDE_DIR NAMES curl/curl.h HINTS ${CURL_PREFIX_DIR} ${CURL_PREFIX_DIR}/include) 25 | mark_as_advanced(CURL_INCLUDE_DIR) 26 | 27 | # Look for the library (sorted from most current/relevant entry to least). 28 | find_library(CURL_LIBRARY NAMES 29 | curl 30 | # Windows MSVC prebuilts: 31 | curllib 32 | libcurl_imp 33 | curllib_static 34 | # Windows older "Win32 - MSVC" prebuilts (libcurl.lib, e.g. libcurl-7.15.5-win32-msvc.zip): 35 | libcurl 36 | HINTS ${CURL_PREFIX_DIR} ${CURL_PREFIX_DIR}/lib64 ${CURL_PREFIX_DIR}/lib 37 | ) 38 | find_library(CURL_STATIC_LIBRARY NAMES libcurl.a 39 | HINTS ${CURL_PREFIX_DIR} ${CURL_PREFIX_DIR}/lib64 ${CURL_PREFIX_DIR}/lib) 40 | mark_as_advanced(CURL_LIBRARY) 41 | 42 | if(CURL_INCLUDE_DIR) 43 | foreach(_curl_version_header curlver.h curl.h) 44 | if(EXISTS "${CURL_INCLUDE_DIR}/curl/${_curl_version_header}") 45 | file(STRINGS "${CURL_INCLUDE_DIR}/curl/${_curl_version_header}" curl_version_str REGEX "^#define[\t ]+LIBCURL_VERSION[\t ]+\".*\"") 46 | 47 | string(REGEX REPLACE "^#define[\t ]+LIBCURL_VERSION[\t ]+\"([^\"]*)\".*" "\\1" CURL_VERSION_STRING "${curl_version_str}") 48 | unset(curl_version_str) 49 | break() 50 | endif() 51 | endforeach() 52 | endif() 53 | 54 | # handle the QUIETLY and REQUIRED arguments and set CURL_FOUND to TRUE if 55 | # all listed variables are TRUE 56 | include(FindPackageHandleStandardArgs) 57 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(CURL 58 | REQUIRED_VARS CURL_LIBRARY CURL_INCLUDE_DIR 59 | VERSION_VAR CURL_VERSION_STRING) 60 | 61 | if(CURL_FOUND) 62 | set(CURL_LIBRARIES ${CURL_LIBRARY}) 63 | set(CURL_INCLUDE_DIRS ${CURL_INCLUDE_DIR}) 64 | endif() 65 | -------------------------------------------------------------------------------- /cmake/Modules/FindGPerftools.cmake: -------------------------------------------------------------------------------- 1 | # Determine compiler flags for gperftools 2 | # Once done this will define 3 | # GPERFTOOLS_FOUND - gperftools found 4 | 5 | find_path(GPERFTOOLS_INCLUDE_DIR NAMES gperftools/profiler.h HINTS "${GPERFTOOLS_DIR}" "${GPERFTOOLS_DIR}/include") 6 | find_library(GPERFTOOLS_PROFILER_LIBRARY NAMES profiler HINTS "${GPERFTOOLS_DIR}" "${GPERFTOOLS_DIR}/lib64" "${GPERFTOOLS_DIR}/lib") 7 | include(FindPackageHandleStandardArgs) 8 | find_package_handle_standard_args(gperftools "Could not find gperftools headers and/or libraries ${DEFAULT_MSG}" GPERFTOOLS_INCLUDE_DIR GPERFTOOLS_PROFILER_LIBRARY) 9 | -------------------------------------------------------------------------------- /cmake/Modules/FindIPP.cmake: -------------------------------------------------------------------------------- 1 | # Determine path to IPP libraries for Zlib 2 | # Once done this will define 3 | # LIBIPP_FOUND - IPP found 4 | 5 | find_library(LIBIPPDC_LIBRARY NAMES libippdc.a ippdc HINTS "${IPPROOT}/lib/intel64/") 6 | find_library(LIBIPPS_LIBRARY NAMES libipps.a ipps HINTS "${IPPROOT}/lib/intel64/") 7 | find_library(LIBIPPCORE_LIBRARY NAMES libippcore.a ippcore HINTS "${IPPROOT}/lib/intel64/") 8 | include(FindPackageHandleStandardArgs) 9 | find_package_handle_standard_args(IPP "Could not find IPP libraries ${DEFAULT_MSG}" LIBIPPDC_LIBRARY LIBIPPS_LIBRARY LIBIPPCORE_LIBRARY) 10 | -------------------------------------------------------------------------------- /cmake/Modules/FindOpenMPv4.cmake: -------------------------------------------------------------------------------- 1 | # Determine if OpenMP specification v4 is supported by the C/C++ compiler 2 | # Once done this will define 3 | # OPENMPV4_FOUND - OpenMP v4 found 4 | # OpenMP_C_FLAGS 5 | # OpenMP_CXX_FLAGS 6 | 7 | include(CheckCSourceCompiles) 8 | find_package(OpenMP QUIET) 9 | set(OpenMPv4_C_TEST_SOURCE 10 | " 11 | #include 12 | #include 13 | 14 | /*Define custom reduction operation*/ 15 | int main() 16 | { 17 | int i = 0; 18 | int A[10]; 19 | int sum = 0; 20 | #pragma omp declare reduction ( sum_up : int : omp_out += omp_in ) initializer(omp_priv = 0) 21 | #pragma omp parallel for default(shared) reduction(sum_up : sum) 22 | for(i=0;i<10;++i) 23 | sum += A[i]; 24 | return 0; 25 | } 26 | ") 27 | 28 | if(NOT OPENMP_FOUND) 29 | set(OPENMPV4_FOUND False) 30 | else() 31 | set(SAFE_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") 32 | set(CMAKE_REQUIRED_FLAGS "${OpenMP_C_FLAGS}") 33 | check_c_source_compiles("${OpenMPv4_C_TEST_SOURCE}" OPENMPV4_FOUND) 34 | set(CMAKE_REQUIRED_FLAGS "${SAFE_CMAKE_REQUIRED_FLAGS}") 35 | endif() 36 | -------------------------------------------------------------------------------- /cmake/Modules/FindProtobufWrapper.cmake: -------------------------------------------------------------------------------- 1 | # Following the standard CMake FindProtobuf module 2 | # Determine compiler flags for protobuf 3 | # Once done this will define 4 | # PROTOBUF_LIBRARY_FOUND - protobuf found 5 | 6 | find_path(PROTOBUF_INCLUDE_DIRS NAMES google/protobuf/service.h PATHS "${PROTOBUF_INCLUDE_DIR}" "${PROTOBUF_LIBRARY}/include") 7 | if(PROTOBUF_STATIC_LINKING OR BUILD_DISTRIBUTABLE_LIBRARY) 8 | find_library(PROTOBUF_LIBRARIES NAMES libprotobuf.a protobuf PATHS "${PROTOBUF_LIBRARY}" "${PROTOBUF_LIBRARY}/lib64" "${PROTOBUF_LIBRARY}/lib") 9 | else() 10 | find_library(PROTOBUF_LIBRARIES NAMES protobuf PATHS "${PROTOBUF_LIBRARY}" "${PROTOBUF_LIBRARY}/lib64" "${PROTOBUF_LIBRARY}/lib") 11 | endif() 12 | include(FindPackageHandleStandardArgs) 13 | find_package_handle_standard_args(Protobuf "Could not find Protobuf headers and/or libraries\n${DEFAULT_MSG}" PROTOBUF_INCLUDE_DIRS PROTOBUF_LIBRARIES) 14 | find_program(PROTOBUF_PROTOC_EXECUTABLE NAMES protoc PATHS "${PROTOBUF_PROTOC_EXECUTABLE}" "${PROTOBUF_LIBRARY}" "${PROTOBUF_LIBRARY}/bin") 15 | if(PROTOBUF_FOUND) 16 | set(PROTOBUF_LIBRARY ${PROTOBUF_LIBRARIES}) 17 | set(PROTOBUF_INCLUDE_DIR ${PROTOBUF_INCLUDE_DIRS}) 18 | endif() 19 | include(FindProtobuf) 20 | -------------------------------------------------------------------------------- /cmake/Modules/FindRapidJSON.cmake: -------------------------------------------------------------------------------- 1 | # Determine compiler flags for RapidJSON 2 | # Once done this will define 3 | # RAPIDJSON_FOUND - RapidJSON found 4 | # RapidJSON_C_FLAGS 5 | # RapidJSON_CXX_FLAGS 6 | 7 | #If specified by user 8 | if(RAPIDJSON_INCLUDE_DIR) 9 | set(RAPIDJSON_INCLUDE_DIR_HINT "${RAPIDJSON_INCLUDE_DIR}") 10 | unset(RAPIDJSON_INCLUDE_DIR CACHE) 11 | endif() 12 | find_path(RAPIDJSON_INCLUDE_DIR NAMES rapidjson/rapidjson.h HINTS "${RAPIDJSON_INCLUDE_DIR_HINT}" 13 | PATHS "${CMAKE_SOURCE_DIR}/dependencies/RapidJSON/include" CMAKE_FIND_ROOT_PATH_BOTH) 14 | include(FindPackageHandleStandardArgs) 15 | find_package_handle_standard_args(RapidJSON "Could not find RapidJSON header: rapidjson/rapidjson.h - specify the variable RAPIDJSON_INCLUDE_DIR to point to the directory /include" RAPIDJSON_INCLUDE_DIR) 16 | -------------------------------------------------------------------------------- /cmake/Modules/FindTileDB.cmake: -------------------------------------------------------------------------------- 1 | # Determine compiler flags for TileDB 2 | # Once done this will define 3 | # TILEDB_FOUND - TileDB found 4 | 5 | #Disable Master Catalog in TileDB 6 | set(ENABLE_MASTER_CATALOG False CACHE BOOL "Disable master catalog") 7 | set(TILEDB_VERBOSE True) 8 | 9 | #Zlib 10 | find_package(ZLIB REQUIRED) 11 | 12 | #OpenSSL 13 | if(OPENSSL_PREFIX_DIR AND NOT OPENSSL_ROOT_DIR) 14 | set(OPENSSL_ROOT_DIR "${OPENSSL_PREFIX_DIR}") 15 | endif() 16 | if(BUILD_DISTRIBUTABLE_LIBRARY) 17 | set(OPENSSL_USE_STATIC_LIBS True) 18 | endif() 19 | find_package(OpenSSL REQUIRED) #now performed inside TileDB 20 | 21 | #libuuid 22 | find_package(libuuid REQUIRED) 23 | 24 | include(FindPackageHandleStandardArgs) 25 | 26 | #Build if TileDB source directory specified 27 | if(TILEDB_SOURCE_DIR) 28 | #OpenMP 29 | if(DISABLE_OPENMP) 30 | set(USE_OPENMP False CACHE BOOL "Disable OpenMP" FORCE) 31 | else() 32 | set(USE_OPENMP True CACHE BOOL "Enable OpenMP" FORCE) 33 | endif() 34 | 35 | find_path(TILEDB_INCLUDE_DIR NAMES tiledb.h HINTS "${TILEDB_SOURCE_DIR}/core/include/c_api") 36 | find_package_handle_standard_args(TileDB "Could not find TileDB headers ${DEFAULT_MSG}" TILEDB_INCLUDE_DIR) 37 | add_subdirectory(${TILEDB_SOURCE_DIR} EXCLUDE_FROM_ALL) 38 | else() 39 | find_path(TILEDB_INCLUDE_DIR NAMES "tiledb.h" HINTS "${TILEDB_INSTALL_DIR}/include") 40 | find_library(TILEDB_LIBRARY NAMES libtiledb.a tiledb HINTS "${TILEDB_INSTALL_DIR}" "${TILEDB_INSTALL_DIR}/lib64" "${TILEDB_INSTALL_DIR}/lib") 41 | find_package_handle_standard_args(TileDB "Could not find TileDB headers and/or libraries ${DEFAULT_MSG}" TILEDB_INCLUDE_DIR TILEDB_LIBRARY) 42 | endif() 43 | -------------------------------------------------------------------------------- /cmake/Modules/Findhtslib.cmake: -------------------------------------------------------------------------------- 1 | # Determine compiler flags for htslib 2 | # Once done this will define 3 | # HTSLIB_FOUND - htslib found 4 | 5 | include(FindPackageHandleStandardArgs) 6 | #Build if htslib source directory specified 7 | if(HTSLIB_SOURCE_DIR) 8 | set(HTSLIB_Debug_CFLAGS "-Wall -fPIC -DDEBUG -g3 -gdwarf-3") #will be picked if compiling in debug mode 9 | set(HTSLIB_Coverage_CFLAGS "${HTSLIB_Debug_CFLAGS}") 10 | set(HTSLIB_Release_CFLAGS " -Wall -fPIC -O3") 11 | set(HTSLIB_Debug_LDFLAGS "-g3 -gdwarf-3") 12 | set(HTSLIB_Coverage_LDFLAGS "${HTSLIB_Debug_LDFLAGS}") 13 | set(HTSLIB_Release_LDFLAGS "") 14 | include(ExternalProject) 15 | if(APPLE AND BUILD_DISTRIBUTABLE_LIBRARY) 16 | set(HTSLIB_EXTRA_CFLAGS -mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}) 17 | endif() 18 | #Cross compiling for MacOSX 19 | if((NOT (CMAKE_SYSTEM_NAME STREQUAL CMAKE_HOST_SYSTEM_NAME)) AND APPLE) 20 | set(HTSLIB_OSXCROSS_COMPILE_FLAGS LIBS=${OSXCROSS_LIBS} CPPFLAGS=${OSXCROSS_CPPFLAGS} --host=${CMAKE_SYSTEM_PROCESSOR}-${CMAKE_SYSTEM}) 21 | endif() 22 | if(CURL_FOUND) 23 | set(HTSLIB_CURL_FLAGS --enable-libcurl --enable-gcs --enable-s3) 24 | set(HTSLIB_${CMAKE_BUILD_TYPE}_CFLAGS "${HTSLIB_${CMAKE_BUILD_TYPE}_CFLAGS} -I${CURL_INCLUDE_DIRS}") 25 | get_filename_component(LIBCURL_DIR ${CURL_LIBRARIES} DIRECTORY) 26 | set(HTSLIB_LIBS "-L${LIBCURL_DIR} -lcurl") 27 | else() 28 | set(HTSLIB_CURL_FLAGS --disable-libcurl) 29 | endif() 30 | ExternalProject_Add( 31 | htslib 32 | DOWNLOAD_COMMAND "" 33 | SOURCE_DIR "${HTSLIB_SOURCE_DIR}" 34 | UPDATE_COMMAND "autoreconf" 35 | PATCH_COMMAND "" 36 | CONFIGURE_COMMAND ${HTSLIB_SOURCE_DIR}/configure CFLAGS=${HTSLIB_${CMAKE_BUILD_TYPE}_CFLAGS} LDFLAGS=${HTSLIB_${CMAKE_BUILD_TYPE}_LDFLAGS} 37 | CC=${CMAKE_C_COMPILER} AR=${CMAKE_AR} RANLIB=${CMAKE_RANLIB} 38 | ${HTSLIB_OSXCROSS_COMPILE_FLAGS} 39 | LIBS=${HTSLIB_LIBS} 40 | --disable-lzma --disable-bz2 41 | ${HTSLIB_CURL_FLAGS} 42 | BUILD_COMMAND ${CMAKE_COMMAND} -E make_directory cram 43 | COMMAND ${CMAKE_COMMAND} -E make_directory test 44 | COMMAND ${CMAKE_COMMAND} -E copy ${HTSLIB_SOURCE_DIR}/version.sh . 45 | COMMAND $(MAKE) -f ${HTSLIB_SOURCE_DIR}/Makefile VPATH=${HTSLIB_SOURCE_DIR} SOURCE_DIR=${HTSLIB_SOURCE_DIR} 46 | AR=${CMAKE_AR} 47 | #BUILD_IN_SOURCE 1 48 | INSTALL_COMMAND "" 49 | ) 50 | find_path(HTSLIB_INCLUDE_DIR NAMES htslib/vcf.h HINTS "${HTSLIB_SOURCE_DIR}" CMAKE_FIND_ROOT_PATH_BOTH) 51 | ExternalProject_Get_Property(htslib BINARY_DIR) 52 | set(HTSLIB_DIR_IN_BUILD_DIR "${BINARY_DIR}") 53 | set(HTSLIB_LIBRARY "${BINARY_DIR}/libhts.a") 54 | find_package_handle_standard_args(htslib "Could not find htslib headers ${DEFAULT_MSG}" HTSLIB_INCLUDE_DIR) 55 | else() 56 | find_path(HTSLIB_INCLUDE_DIR NAMES htslib/vcf.h HINTS "${HTSLIB_INSTALL_DIR}") 57 | find_library(HTSLIB_LIBRARY NAMES libhts.a hts HINTS "${HTSLIB_INSTALL_DIR}") 58 | find_package_handle_standard_args(htslib "Could not find htslib headers and/or libraries ${DEFAULT_MSG}" HTSLIB_INCLUDE_DIR HTSLIB_LIBRARY) 59 | endif() 60 | -------------------------------------------------------------------------------- /cmake/Modules/Findlibcsv.cmake: -------------------------------------------------------------------------------- 1 | # Determine compiler flags for libcsv 2 | # Once done this will define 3 | # LIBCSV_FOUND - libcsv found 4 | 5 | find_path(LIBCSV_INCLUDE_DIR NAMES csv.h HINTS "${LIBCSV_DIR}/include" "${LIBCSV_DIR}") 6 | find_library(LIBCSV_LIBRARY NAMES csv HINTS "${LIBCSV_DIR}/lib" "${LIBCSV_DIR}/.libs" "${LIBCSV_DIR}") 7 | include(FindPackageHandleStandardArgs) 8 | find_package_handle_standard_args(libcsv "Could not find libcsv headers and/or libraries ${DEFAULT_MSG}" LIBCSV_INCLUDE_DIR LIBCSV_LIBRARY) 9 | -------------------------------------------------------------------------------- /cmake/Modules/Findlibdbi.cmake: -------------------------------------------------------------------------------- 1 | # Determine compiler flags for libdbi 2 | # Once done this will define 3 | # LIBDBI_FOUND - libdbi found 4 | 5 | find_path(LIBDBI_INCLUDE_DIR NAMES dbi/dbi.h HINTS "${LIBDBI_DIR}/include") 6 | 7 | find_library(LIBPGSQL_DRIVER_LIBRARY NAMES dbdpgsql HINTS "${LIBDBI_DIR}/lib/dbd") 8 | 9 | find_library(LIBDBI_DEV_LIBRARY NAMES dbi HINTS "${LIBDBI_DIR}/lib") 10 | 11 | include(FindPackageHandleStandardArgs) 12 | 13 | find_package_handle_standard_args(libdbi "Could not find libdbi headers and/or libraries ${DEFAULT_MSG}" LIBDBI_INCLUDE_DIR LIBDBI_DEV_LIBRARY LIBPGSQL_DRIVER_LIBRARY) 14 | 15 | if(LIBDBI_FOUND) 16 | include(CheckCSourceCompiles) 17 | file(READ ${CMAKE_SOURCE_DIR}/cmake/Modules/libdbi_test_program.c LIBDBI_C_TEST_SOURCE) 18 | set(SAFE_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") 19 | set(CMAKE_REQUIRED_FLAGS "-I${LIBDBI_INCLUDE_DIR}") 20 | set(SAFE_CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") 21 | set(CMAKE_REQUIRED_LIBRARIES ${LIBPGSQL_DRIVER_LIBRARY} ${LIBDBI_DEV_LIBRARY}) 22 | check_c_source_compiles("${LIBDBI_C_TEST_SOURCE}" LIBDBI_TEST_PROGRAM_COMPILES) 23 | if(NOT LIBDBI_TEST_PROGRAM_COMPILES) 24 | message(STATUS "libdbi headers and libraries found; however, test program fails to compile. GenomicsDB requires libdbi >= 0.9.0.") 25 | message(STATUS "Mapping DB support disabled") 26 | unset(LIBDBI_FOUND) 27 | endif() 28 | set(CMAKE_REQUIRED_FLAGS "${SAFE_CMAKE_REQUIRED_FLAGS}") 29 | set(CMAKE_REQUIRED_LIBRARIES "${SAFE_CMAKE_REQUIRED_LIBRARIES}") 30 | endif() 31 | -------------------------------------------------------------------------------- /cmake/Modules/Findlibuuid.cmake: -------------------------------------------------------------------------------- 1 | # Determine compiler flags for libuuid 2 | # Once done this will define 3 | # LIBUUID_FOUND - libuuid found 4 | 5 | find_path(LIBUUID_INCLUDE_DIR NAMES uuid/uuid.h HINTS "${LIBUUID_DIR}/include" "${LIBUUID_DIR}") 6 | 7 | if(BUILD_DISTRIBUTABLE_LIBRARY) 8 | find_library(LIBUUID_LIBRARY NAMES libuuid.a uuid HINTS "${LIBUUID_DIR}/lib64" "${LIBUUID_DIR}/lib" "${LIBUUID_DIR}") 9 | else() 10 | find_library(LIBUUID_LIBRARY NAMES uuid HINTS "${LIBUUID_DIR}/lib64" "${LIBUUID_DIR}/lib" "${LIBUUID_DIR}") 11 | endif() 12 | 13 | include(FindPackageHandleStandardArgs) 14 | find_package_handle_standard_args(libuuid "Could not find libuuid headers and/or libraries ${DEFAULT_MSG}" LIBUUID_INCLUDE_DIR LIBUUID_LIBRARY) 15 | -------------------------------------------------------------------------------- /cmake/Modules/Findsafestringlib.cmake: -------------------------------------------------------------------------------- 1 | # Determine compiler flags for libuuid 2 | # Once done this will define 3 | # SAFESTRINGLIB_FOUND - libuuid found 4 | 5 | find_path(SAFESTRINGLIB_INCLUDE_DIR NAMES safe_mem_lib.h HINTS "${SAFESTRINGLIB_DIR}/include") 6 | find_library(SAFESTRINGLIB_LIBRARY NAMES libsafestring.a safestring HINTS "${SAFESTRINGLIB_DIR}/lib64" 7 | "${SAFESTRINGLIB_DIR}/lib" "${SAFESTRINGLIB_DIR}") 8 | 9 | include(FindPackageHandleStandardArgs) 10 | find_package_handle_standard_args(safestringlib "Could not find safestring headers and/or libraries ${DEFAULT_MSG}" SAFESTRINGLIB_INCLUDE_DIR 11 | SAFESTRINGLIB_LIBRARY) 12 | -------------------------------------------------------------------------------- /cmake/Modules/libdbi_test_program.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | dbi_conn conn; 6 | dbi_result result; 7 | dbi_inst instance; 8 | 9 | dbi_initialize_r(NULL, &instance); 10 | conn = dbi_conn_new_r("pgsql", instance); 11 | 12 | dbi_conn_set_option(conn, "host", "localhost"); 13 | 14 | if (dbi_conn_connect(conn) < 0) 15 | exit(0); 16 | else 17 | { 18 | result = dbi_conn_query(conn, "SELECT * from table"); 19 | if (result) { 20 | while (dbi_result_next_row(result)) { 21 | unsigned idnumber = dbi_result_get_uint(result, "id"); 22 | } 23 | dbi_result_free(result); 24 | } 25 | dbi_conn_close(conn); 26 | } 27 | dbi_shutdown_r(instance); 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /contrib/README.md: -------------------------------------------------------------------------------- 1 | # External Contributions 2 | GenomicsDB is released under the MIT License. Hence, we expect the external 3 | contributors to grant an MIT License for their contributions. We have adopted 4 | the Developer Certificate of Origin from the Linux project, and request the 5 | developer to include a 'Signed-off-by' line in the commit message to indicate 6 | they understand and agree to the DCO. 7 | 8 | Signed-off-by: First Last developeremail@example.com (github: 9 | developer_githubid) 10 | 11 | ## Checklist before creating Pull Request 12 | Please ensure the following before creating a pull request 13 | 14 | 1. Code is well documented in javadoc style 15 | 2. Include a README.md in markdown so that we can add it to Wiki 16 | - Clearly state what is the key contribution and what problem it solves 17 | - How are users going to build and run the tool 18 | - How to handle dependencies, if any 19 | - Known errors and troubleshooting 20 | -------------------------------------------------------------------------------- /docker/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest ~= 3.0.7 2 | pylint ~= 1.7.1 3 | 4 | -------------------------------------------------------------------------------- /docker/vcf_combiner/Dockerfile: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | # Copyright (c) 2016-2017 Intel Corporation 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | # this software and associated documentation files (the "Software"), to deal in 6 | # the Software without restriction, including without limitation the rights to 7 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | # the Software, and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 19 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # The code utilized some features of RedHat pre-built container 22 | FROM centos:7 23 | 24 | LABEL vendor="Intel Corporation" name="VCF Combiner" description="Combine VCF files into a single VCF file" 25 | 26 | RUN yum install -y --setopt=tsflags=nodocs epel-release && \ 27 | yum repolist && \ 28 | yum install -y python34.x86_64 && \ 29 | yum install -y python34-setuptools && \ 30 | easy_install-3.4 pip && \ 31 | yum install -y --setopt=tsflags=nodocs libcsv mpich openssl zlib unzip.x86_64 && \ 32 | yum clean all && \ 33 | pip3 install PyVCF 34 | 35 | ADD ./usr /usr 36 | ADD ./etc /etc 37 | ADD ./root /root 38 | COPY tmp/vcf2tiledb /usr/bin/ 39 | 40 | WORKDIR /tmp 41 | 42 | ENV BASH_ENV=/etc/profile.d/cont-env.sh HOME=/home/default PATH=$PATH:/usr/lib64/mpich/bin 43 | 44 | RUN groupadd -r default -f -g 5658 && \ 45 | useradd -u 5658 -g default -o -c "Default User" default -s /sbin/nologin 46 | 47 | #USER default 48 | 49 | ENTRYPOINT ["/usr/bin/container-entrypoint"] 50 | 51 | CMD ["container-usage"] 52 | -------------------------------------------------------------------------------- /docker/vcf_combiner/etc/profile.d/cont-env.sh: -------------------------------------------------------------------------------- 1 | source /usr/share/cont-lib/cont-env.sh 2 | -------------------------------------------------------------------------------- /docker/vcf_combiner/root/.bashrc: -------------------------------------------------------------------------------- 1 | source /usr/share/cont-lib/cont-env.sh 2 | -------------------------------------------------------------------------------- /docker/vcf_combiner/usr/bin/container-entrypoint: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | . /usr/share/cont-lib/cont-lib.sh 3 | 4 | ls /usr/bin/vcf2tiledb > /dev/null 2>&1 5 | test $? -ne 0 && cont_error "cannot find vcf2tiledb... exiting" && exit 2 6 | 7 | if [ "$#" -gt 0 -a "$1" == "--version" ]; then 8 | __cont_msg "info vcf2tiledb version is $(vcf2tiledb --version)" 9 | exit 0 10 | fi 11 | 12 | my_uid=$(id -u) 13 | test -z "$*" && set -- bash 14 | if [ "$1" == "combine_vcf" ]; then 15 | __cont_msg "info run combine_vcf as user id $my_uid, vcf2tiledb version is $(vcf2tiledb --version)" 16 | echo 17 | fi 18 | 19 | if [ $my_uid == 5658 ]; then 20 | cd $HOME 21 | else 22 | export HOME=/ 23 | fi 24 | 25 | exec "$@" 26 | -------------------------------------------------------------------------------- /docker/vcf_combiner/usr/bin/container-usage: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # TODO: support API for executable help scripts 4 | cat /usr/share/cont-docs/*.txt 2>/dev/null 5 | 6 | 7 | -------------------------------------------------------------------------------- /docker/vcf_combiner/usr/share/cont-docs/README: -------------------------------------------------------------------------------- 1 | Files '*.txt' are automatically read and added to 'container-usage' output. 2 | -------------------------------------------------------------------------------- /docker/vcf_combiner/usr/share/cont-docs/combine_vcf_help.txt: -------------------------------------------------------------------------------- 1 | 2 | HOW TO use generate combined VCF file using docker image vcf_combiner 3 | ---------------------------------------------------------------------- 4 | 5 | The command performs VCF files combining is combine_vcf. The usage: 6 | 7 | combine_vcf -R /container_path/to/ref_file -o /container_path/output/combined.vcf.gz -i /container_path/tn.list 8 | OR 9 | combine_vcf -R /container_path/to/ref_file -o /container_path/output/combined.vcf.gz -i /container_path/vcf_file_1,...,/container_path/vcf_file_n 10 | 11 | where -o : path to outpur file name 12 | -i : either a list of vcf files or vaf files separated by ',' 13 | -R : reference file 14 | -c : optional, callset file 15 | 16 | We can combine VCF files at host command line or at docker container command line. 17 | 18 | --- Examples of running at host --- 19 | RUN with -i a_list_of_vcf_files 20 | _HOST$ sudo docker run -it -v /host_docker_data/:/data/ -v /host_docker_reference/:/ref/ vcf_combiner combine_vcf -R /ref/Homo_sapiens_assembly19.fasta -o /data/output/combined.vcf.gz -i /data/tn.list 21 | OR RUN with -i vcf_files ... 22 | _HOST$ sudo docker run -it /host_docker_data/:/data/ -v /host_docker_reference/:/ref/ vcf_combiner combine_vcf -R /ref/Homo_sapiens_assembly19.fasta -o /data/output/combined.vcf.gz -i /data/vcfs/info_op1.vcf.gz,/data/vcfs/t7.vcf.gz,/data/vcfs/t1.vcf.gz 23 | 24 | --- Examples of running in docker container --- 25 | CREATE a docker container with bash command 26 | _HOST$ sudo docker run -it -v /host_docker_data/:/data/ -v /host_docker_reference/:/ref/ vcf_combiner bash 27 | 28 | RUN with -i a_list_of_vcf_files 29 | _DOCKER$ combine_vcf -R /ref/Homo_sapiens_assembly19.fasta -o /data/output/combined.vcf.gz -i /data/tn.list 30 | OR RUN with -i vcf_files ... 31 | _DOCKER$ combine_vcf -R /ref/Homo_sapiens_assembly19.fasta -o /data/output/combined.vcf.2.gz -i /data/vcfs/info_op1.vcf.gz,/data/vcfs/t7.vcf.gz,/data/vcfs/t1.vcf.gz 32 | 33 | --- The tn.list in above Examples --- 34 | /data/vcfs/info_op1.vcf.gz 35 | /data/vcfs/t7.vcf.gz 36 | /data/vcfs/t1.vcf.gz 37 | -------------------------------------------------------------------------------- /docker/vcf_combiner/usr/share/cont-entry/cont-env.sh: -------------------------------------------------------------------------------- 1 | source /usr/share/cont-lib/cont-env.sh 2 | -------------------------------------------------------------------------------- /docker/vcf_combiner/usr/share/cont-layer/README: -------------------------------------------------------------------------------- 1 | This directory is supposed to be filled by layers above the actual image. 2 | -------------------------------------------------------------------------------- /docker/vcf_combiner/usr/share/cont-lib/autoload/README: -------------------------------------------------------------------------------- 1 | All '*.sh' files in this directory will be automatically sourced together with 2 | cont-lib.sh script. 3 | -------------------------------------------------------------------------------- /docker/vcf_combiner/usr/share/cont-lib/autoload/genomicsdb-env.sh: -------------------------------------------------------------------------------- 1 | if [ $(id -u) == 5658 ]; then 2 | export HOME=/home/default 3 | else 4 | export HOME=/ 5 | fi 6 | export PATH=$PATH:/usr/lib64/mpich/bin 7 | -------------------------------------------------------------------------------- /docker/vcf_combiner/usr/share/cont-lib/cont-env.sh: -------------------------------------------------------------------------------- 1 | . /usr/share/cont-lib/cont-lib.sh 2 | 3 | cont_debug "changing environment variables" 4 | 5 | cont_source_hooks env common 6 | 7 | -------------------------------------------------------------------------------- /docker/vcf_combiner/usr/share/cont-lib/cont-lib.sh: -------------------------------------------------------------------------------- 1 | __cont_source_scripts() 2 | { 3 | local i 4 | local dir="$1" 5 | for i in "$dir"/*.sh; do 6 | if test -r "$i"; then 7 | . "$i" 8 | fi 9 | done 10 | } 11 | 12 | 13 | # CONT_SOURCE_HOOKS HOOKDIR [PROJECT] 14 | # ----------------------------------- 15 | # Source '*.sh' files from the following directories (in this order): 16 | # a. /usr/share/cont-layer/PROJECT/HOOK/ 17 | # b. /usr/share/cont-volume/PROJECT/HOOK/ 18 | # 19 | # The PROJECT argument is optional because it may be set globally by 20 | # $CONT_PROJECT environment variable. The need for PROJECT argument is 21 | # basically to push people to install script into theirs own directories, 22 | # which will allow easier multi-project containers maintenance. 23 | cont_source_hooks() 24 | { 25 | local i dir 26 | local hook="$1" 27 | local project="$CONT_PROJECT" 28 | local dir 29 | 30 | test -z "$hook" && return 31 | test -n "$2" && project="$2" 32 | 33 | for dir in /usr/share/cont-layer /usr/share/cont-volume; do 34 | dir="$dir/$project/$hook" 35 | cont_debug2 "loading scripts from $dir" 36 | __cont_source_scripts "$dir" 37 | done 38 | } 39 | 40 | __cont_msg() 41 | { 42 | echo "$*" >&2 43 | } 44 | 45 | 46 | __cont_dbg() 47 | { 48 | test -z "$CONT_DEBUG" && CONT_DEBUG=0 49 | test "$CONT_DEBUG" -lt "$1" && return 50 | local lvl="$1" 51 | shift 52 | __cont_msg "debug_$lvl: $*" 53 | } 54 | 55 | 56 | cont_warn() { __cont_msg "warn: $*" ; } 57 | cont_error() { __cont_msg "error: $*"; } 58 | cont_debug() { __cont_dbg 1 "$*" ; } 59 | cont_debug2() { __cont_dbg 2 "$*" ; } 60 | cont_debug3() { __cont_dbg 3 "$*" ; } 61 | 62 | 63 | __cont_encode_env() 64 | { 65 | local i 66 | for i in $1 67 | do 68 | eval local val="\$$i" 69 | printf ": \${%s=%q}\n" "$i" "$val" 70 | done 71 | } 72 | 73 | 74 | # CONT_STORE_ENV VARIABLES FILENAME 75 | # --------------------------------- 76 | # Create source-able script conditionally setting specified VARIABLES by 77 | # inheritting the values from current environment; Create the file on path 78 | # FILENAME. Already existing variables will not be changed by sourcing the 79 | # resulting script. The argument VARIABLES expects list of space separated 80 | # variable names. 81 | # 82 | # Usage: 83 | # $ my_var=my_value 84 | # $ my_var2="my value2" 85 | # $ cont_store_env "my_var my_var2" ~/.my-environment 86 | # $ cat ~/.my-environment 87 | # : ${my_var=my_value} 88 | # : ${my_var2=my\ value2} 89 | cont_store_env() 90 | { 91 | cont_debug "creating env file '$2'" 92 | __cont_encode_env "$1" > "$2" \ 93 | || cont_warn "can't store environment $1 into $2 file" 94 | } 95 | 96 | 97 | __cont_source_scripts "/usr/share/cont-lib/autoload" 98 | -------------------------------------------------------------------------------- /docker/vcf_combiner/usr/share/cont-lib/parser-simple-config.sh: -------------------------------------------------------------------------------- 1 | . "/usr/share/cont-lib/cont-lib.sh" 2 | 3 | # SEMICOLON_SPLIT VAR 4 | # ------------------- 5 | # Split the contents of string variable VAR into list of strings (on separate 6 | # line), each of those strings will be printed to standard output. The ';' and 7 | # newline characters are used as split separators. You can use quadrigraph @.,@ 8 | # for ';' character not splitting the VAR (and use @&t@ to expand into empty 9 | # string). More info about quadrigraphs may be found in autoconf info page. 10 | cont_semicolon_split() 11 | { 12 | eval set -- "\"\$$1\"" 13 | test x = x"$1" && return 0 14 | 15 | echo "$1" \ 16 | | sed \ 17 | -e 's/[[:space:]]*;[[:space:]]*/\n/g' \ 18 | -e 's/^[[:space:]]*//g' \ 19 | -e 's/\([^\\]\)[[:space:]]*$/\1/g' \ 20 | | sed \ 21 | -e 's|@.,@|;|g' \ 22 | -e 's|@&t@||g' 23 | } 24 | 25 | # CONT_PARSER_SIMPLE_CONFIG CONFIG_VAR CALLBACK [ARGS] 26 | # ---------------------------------------------------- 27 | # Parse contents of variable of name CONFIG_VAR, call CALLBACK function (or 28 | # command) for each parsed configuration option. 29 | # 30 | # The format of configuration file is: 31 | # 32 | # = [; ...] 33 | # 34 | # Content of is not limited, but keep it sane please (lets say we support 35 | # the C syntax of identifiers). The must be single-line string. Should 36 | # the contain ';' or '\' character, it must be escaped by '\'. 37 | # 38 | # The semantics of CALLBACK you *must* provide is: 39 | # 40 | # CALLBACK KEY VALUE [ARGS] 41 | # ------------------------- 42 | # KEY and VALUE are strings with parsed result. ARGS is additional payload 43 | # you may provide during CONTAINER_SIMPLE_CONFIG_PARSER call. 44 | # 45 | # Example of usage: 46 | # 47 | # $ cat script.sh 48 | # callback() 49 | # { 50 | # local var="$1" val="$2" 51 | # shift 2 52 | # test -n "$*" && local payload=" [$*]" 53 | # echo "$var=$val$payload" 54 | # } 55 | # config='URL = "http://example.com"; semicolon = "@.,@"' 56 | # cont_parser_simple_config config callback additional data 57 | # 58 | # $ ./script 59 | # URL="http://example.com" [additional data] 60 | # semicolon=";" [additional data] 61 | cont_parser_simple_config() 62 | { 63 | local conf_var="$1" 64 | local callback="$2" 65 | shift 2 66 | 67 | while read line; do 68 | test -z "$line" && continue 69 | if [[ $line =~ ^([^[:space:]]+)[[:space:]]*=[[:space:]]*(.*)$ ]] 70 | then 71 | local k="${BASH_REMATCH[1]}" v="${BASH_REMATCH[2]}" 72 | cont_debug3 "calling callback with: $k = $v" 73 | "$callback" "$k" "$v" "$@" || { 74 | cont_error "$FUNCNAME: callback failed" 75 | return 1 76 | } 77 | else 78 | cont_warn "wrong config: $line" 79 | fi 80 | done < <(cont_semicolon_split "$conf_var") 81 | 82 | return 0 83 | } 84 | -------------------------------------------------------------------------------- /docker/vcf_combiner/usr/share/cont-volume/README: -------------------------------------------------------------------------------- 1 | This directory is supposed to be over-mounted by user. 2 | -------------------------------------------------------------------------------- /example/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(NOT DISABLE_MPI) 2 | build_GenomicsDB_executable(test_genomicsdb_bcf_generator) 3 | build_GenomicsDB_executable(test_genomicsdb_importer) 4 | endif() 5 | -------------------------------------------------------------------------------- /example/java/TestGenomicsDBImporterWithMergedVCFHeader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | import com.intel.genomicsdb.exception.GenomicsDBException; 24 | import com.intel.genomicsdb.importer.GenomicsDBImporter; 25 | import com.intel.genomicsdb.model.CommandLineImportConfig; 26 | import org.json.simple.parser.ParseException; 27 | 28 | import java.io.IOException; 29 | 30 | public final class TestGenomicsDBImporterWithMergedVCFHeader { 31 | 32 | public static void main(final String[] args) throws IOException, GenomicsDBException, ParseException, InterruptedException { 33 | CommandLineImportConfig config = new CommandLineImportConfig("TestGenomicsDBImporterWithMergedVCFHeader", args); 34 | GenomicsDBImporter importer = new GenomicsDBImporter(config); 35 | importer.executeImport(); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /example/src/test_genomicsdb_bcf_generator.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include 24 | #include "headers.h" 25 | #include "genomicsdb_bcf_generator.h" 26 | #include 27 | 28 | int main(int argc, char *argv[]) { 29 | //Initialize MPI environment 30 | auto rc = MPI_Init(0, 0); 31 | if (rc != MPI_SUCCESS) { 32 | printf ("Error starting MPI program. Terminating.\n"); 33 | MPI_Abort(MPI_COMM_WORLD, rc); 34 | } 35 | //Get my world rank 36 | int my_world_mpi_rank = 0; 37 | MPI_Comm_rank(MPI_COMM_WORLD, &my_world_mpi_rank); 38 | // Define long options 39 | static struct option long_options[] = 40 | { 41 | {"page-size",1,0,'p'}, 42 | {"rank",1,0,'r'}, 43 | {"output-format",1,0,'O'}, 44 | {"json-config",1,0,'j'}, 45 | {"loader-json-config",1,0,'l'}, 46 | {0,0,0,0}, 47 | }; 48 | int c; 49 | uint64_t page_size = 0u; 50 | std::string output_format = ""; 51 | std::string json_config_file = ""; 52 | std::string loader_json_config_file = ""; 53 | while((c=getopt_long(argc, argv, "j:l:p:r:O:", long_options, NULL)) >= 0) 54 | { 55 | switch(c) 56 | { 57 | case 'p': 58 | page_size = strtoull(optarg, 0, 10); 59 | break; 60 | case 'r': 61 | my_world_mpi_rank = strtoull(optarg, 0 ,10); 62 | break; 63 | case 'O': 64 | output_format = std::move(std::string(optarg)); 65 | break; 66 | case 'j': 67 | json_config_file = std::move(std::string(optarg)); 68 | break; 69 | case 'l': 70 | loader_json_config_file = std::move(std::string(optarg)); 71 | break; 72 | default: 73 | std::cerr << "Unknown command line argument\n"; 74 | exit(-1); 75 | } 76 | } 77 | std::vector buffer(page_size > 0u ? page_size : 100u); 78 | //assert(json_config_file.length() > 0u && loader_json_config_file.length() > 0u); 79 | GenomicsDBBCFGenerator bcf_reader(loader_json_config_file, json_config_file, my_world_mpi_rank, page_size, std::max(page_size, 1024u), 80 | output_format.c_str()); 81 | while(!(bcf_reader.end())) 82 | { 83 | auto num_bytes_read = bcf_reader.read_and_advance(&(buffer[0]), 0u, buffer.size()); 84 | if(num_bytes_read > 0u) 85 | fwrite(&(buffer[0]), 1u, num_bytes_read, stdout); 86 | } 87 | MPI_Finalize(); 88 | return 0; 89 | } 90 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pylint 3 | PyVCF 4 | jsondiff 5 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(resources) 2 | add_subdirectory(main) 3 | add_subdirectory(test) 4 | -------------------------------------------------------------------------------- /src/main/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(GenomicsDB_library_sources 2 | cpp/src/query_operations/variant_operations.cc 3 | cpp/src/query_operations/broad_combined_gvcf.cc 4 | cpp/src/genomicsdb/variant_cell.cc 5 | cpp/src/genomicsdb/variant_storage_manager.cc 6 | cpp/src/genomicsdb/variant_field_data.cc 7 | cpp/src/genomicsdb/variant_array_schema.cc 8 | cpp/src/genomicsdb/variant_field_handler.cc 9 | cpp/src/genomicsdb/variant.cc 10 | cpp/src/genomicsdb/query_variants.cc 11 | cpp/src/genomicsdb/genomicsdb_columnar_field.cc 12 | cpp/src/genomicsdb/genomicsdb_iterators.cc 13 | cpp/src/genomicsdb/genomicsdb_multid_vector_field.cc 14 | cpp/src/loader/tiledb_loader_text_file.cc 15 | cpp/src/loader/load_operators.cc 16 | cpp/src/loader/genomicsdb_importer.cc 17 | cpp/src/loader/tiledb_loader_file_base.cc 18 | cpp/src/loader/tiledb_loader.cc 19 | cpp/src/utils/command_line.cc 20 | cpp/src/utils/memory_measure.cc 21 | cpp/src/utils/histogram.cc 22 | cpp/src/utils/vid_mapper_pb.cc 23 | cpp/src/utils/lut.cc 24 | cpp/src/utils/known_field_info.cc 25 | cpp/src/utils/vid_mapper.cc 26 | cpp/src/utils/vid_mapper_sql.cc 27 | cpp/src/utils/timer.cc 28 | cpp/src/vcf/vcf_adapter.cc 29 | cpp/src/vcf/genomicsdb_bcf_generator.cc 30 | cpp/src/vcf/vcf2binary.cc 31 | cpp/src/config/variant_query_config.cc 32 | cpp/src/config/genomicsdb_config_base.cc 33 | cpp/src/config/json_config.cc 34 | ) 35 | 36 | include_directories(${PROTOBUF_GENERATED_CXX_HDRS_INCLUDE_DIRS}) 37 | set(GenomicsDB_library_sources 38 | ${GenomicsDB_library_sources} 39 | ${PROTOBUF_GENERATED_CXX_SRCS} 40 | ) 41 | 42 | if(PROTOBUF_REGENERATE) 43 | #Must be set here - see https://cmake.org/cmake/help/v3.3/command/set_source_files_properties.html 44 | set_source_files_properties(${PROTOBUF_GENERATED_CXX_SRCS} ${PROTOBUF_GENERATED_CXX_HDRS} PROPERTIES GENERATED True) 45 | endif() 46 | 47 | if(BUILD_JAVA) 48 | set(GenomicsDB_library_sources ${GenomicsDB_library_sources} 49 | jni/src/genomicsdb_GenomicsDBUtils.cc 50 | jni/src/genomicsdb_GenomicsDBImporter.cc 51 | jni/src/genomicsdb_GenomicsDBQueryStream.cc 52 | jni/src/genomicsdb_jni_init.cc 53 | ) 54 | endif() 55 | 56 | #Compile sources with PIC 57 | add_library(GenomicsDB_library_object_files OBJECT ${GenomicsDB_library_sources}) 58 | if(PROTOBUF_REGENERATE) 59 | add_dependencies(GenomicsDB_library_object_files PROTOBUF_GENERATED_CXX_TARGET) 60 | endif() 61 | set_property(TARGET GenomicsDB_library_object_files PROPERTY POSITION_INDEPENDENT_CODE ON) 62 | 63 | #Create the shared and static libraries 64 | add_library(genomicsdb STATIC $) 65 | #Link in other libraries 66 | add_library(tiledbgenomicsdb SHARED $) 67 | if(TILEDB_SOURCE_DIR) 68 | target_link_libraries(tiledbgenomicsdb tiledb_static) 69 | else() 70 | target_link_libraries(tiledbgenomicsdb ${TILEDB_LIBRARY}) 71 | endif() 72 | if(HTSLIB_SOURCE_DIR) 73 | add_dependencies(tiledbgenomicsdb htslib) 74 | endif() 75 | target_link_libraries(tiledbgenomicsdb ${HTSLIB_LIBRARY} ${GENOMICSDB_EXTERNAL_DEPENDENCIES_LIBRARIES}) 76 | if(LIBRT_LIBRARY) 77 | target_link_libraries(tiledbgenomicsdb ${LIBRT_LIBRARY}) 78 | endif() 79 | 80 | if(LIBCSV_FOUND) 81 | target_link_libraries(tiledbgenomicsdb ${LIBCSV_LIBRARY}) 82 | endif() 83 | #if(BUILD_DISTRIBUTABLE_LIBRARY) 84 | #target_link_libraries(tiledbgenomicsdb ${OPENSSL_LIBRARIES}) 85 | #endif() 86 | 87 | install(TARGETS genomicsdb tiledbgenomicsdb LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) 88 | -------------------------------------------------------------------------------- /src/main/cpp/include/config/json_config.h: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2018 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #ifndef JSON_CONFIG_H 24 | #define JSON_CONFIG_H 25 | 26 | #include "genomicsdb_config_base.h" 27 | 28 | #include "rapidjson/document.h" 29 | #include "rapidjson/reader.h" 30 | #include "rapidjson/stringbuffer.h" 31 | #include "rapidjson/writer.h" 32 | #include "rapidjson/filewritestream.h" 33 | #include "rapidjson/prettywriter.h" 34 | 35 | class JSONConfigBase : public GenomicsDBConfigBase 36 | { 37 | public: 38 | JSONConfigBase() 39 | : GenomicsDBConfigBase() 40 | {} 41 | JSONConfigBase(const GenomicsDBConfigBase& x) 42 | : GenomicsDBConfigBase(x) 43 | {} 44 | static void extract_contig_interval_from_object(const rapidjson::Value& curr_json_object, 45 | const VidMapper* id_mapper, ColumnRange& result); 46 | static bool extract_interval_from_PB_struct_or_return_false(const rapidjson::Value& curr_json_object, 47 | const VidMapper* id_mapper, 48 | ColumnRange& result); 49 | void read_from_file(const std::string& filename, const int rank=0); 50 | void read_and_initialize_vid_and_callset_mapping_if_available(const int rank); 51 | const rapidjson::Document& get_rapidjson_doc() const { return m_json; } 52 | protected: 53 | rapidjson::Document m_json; 54 | }; 55 | 56 | rapidjson::Document parse_json_file(const std::string& s); 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /src/main/cpp/include/genomicsdb/genomicsdb_jni_exception.h: -------------------------------------------------------------------------------- 1 | #ifndef GENOMICSDB_JNI_EXCEPTION_H 2 | #define GENOMICSDB_JNI_EXCEPTION_H 3 | 4 | #include "headers.h" 5 | 6 | class GenomicsDBJNIException : public std::exception { 7 | public: 8 | GenomicsDBJNIException(const std::string m="") : msg_("GenomicsDBJNIException : "+m) { ; } 9 | ~GenomicsDBJNIException() { ; } 10 | // ACCESSORS 11 | const char* what() const noexcept { return msg_.c_str(); } 12 | private: 13 | std::string msg_; 14 | }; 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /src/main/cpp/include/utils/command_line.h: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #ifndef PARSE_COMMAND_LINE_H 24 | #define PARSE_COMMAND_LINE_H 25 | 26 | #include 27 | class CommandLineOpts 28 | { 29 | public: 30 | CommandLineOpts() 31 | { 32 | m_do_scan = false; 33 | m_is_input_csv_sorted = false; 34 | m_workspace = 0; 35 | m_csv_filename = 0; 36 | m_array_name = 0; 37 | m_num_samples = 0ull; 38 | m_position = 0ull; 39 | m_end_position = 600000000000ull; //600B - large number 40 | m_temp_space = ""; 41 | m_test_C_pointers = false; 42 | } 43 | bool m_do_scan; 44 | bool m_is_input_csv_sorted; 45 | char* m_workspace; 46 | char* m_csv_filename; 47 | char* m_array_name; 48 | std::ofstream m_output_fstream; 49 | std::ifstream m_positions_list; 50 | uint64_t m_num_samples; 51 | uint64_t m_position; 52 | uint64_t m_end_position; 53 | std::string m_temp_space; 54 | bool m_test_C_pointers; 55 | }; 56 | 57 | void parse_command_line(int argc, char** argv, CommandLineOpts& cl); 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /src/main/cpp/include/utils/gt_common.h: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #ifndef GT_COMMON_H 24 | #define GT_COMMON_H 25 | 26 | #include "headers.h" 27 | #include "profiling.h" 28 | 29 | #include "vcf.h" 30 | 31 | #define CHECK_MISSING_SAMPLE_GIVEN_REF(REF) (((REF).size() == 0) || ((REF)[0] == '$')) 32 | #define CHECK_UNINITIALIZED_SAMPLE_GIVEN_REF(REF) ((REF).size() == 0 || ((REF) == "")) 33 | #define CHECK_IN_THE_MIDDLE_REF(REF) ((REF)[0] == 'N') 34 | extern std::string g_vcf_NON_REF; 35 | extern std::string g_vcf_SPANNING_DELETION; 36 | inline bool IS_NON_REF_ALLELE(const std::string& allele) 37 | { 38 | return allele.length() > 0 && ((allele)[0] == '&'); 39 | } 40 | 41 | inline bool IS_NON_REF_ALLELE(const char allele_char) 42 | { 43 | return allele_char == '&'; 44 | } 45 | 46 | #define TILEDB_NON_REF_VARIANT_REPRESENTATION "&" 47 | #define TILEDB_ALT_ALLELE_SEPARATOR "|" 48 | #define MAX_DIPLOID_ALT_ALLELES_THAT_CAN_BE_GENOTYPED 50u 49 | #define DEFAULT_COMBINED_VCF_RECORDS_BUFFER_SIZE 1048576u 50 | 51 | #define UNDEFINED_ATTRIBUTE_IDX_VALUE 0xFFFFFFFFu 52 | #define UNDEFINED_NUM_ROWS_VALUE 0xFFFFFFFFFFFFFFFFull 53 | #define UNDEFINED_UINT64_T_VALUE 0xFFFFFFFFFFFFFFFFull 54 | 55 | extern std::unordered_map g_variant_field_type_index_to_tiledb_type; 56 | extern std::unordered_map g_variant_field_type_index_to_bcf_ht_type; 57 | extern std::vector g_tiledb_type_to_variant_field_type_index; 58 | 59 | extern std::string g_tmp_scratch_dir; 60 | 61 | extern const char* g_json_indent_unit; 62 | 63 | #endif 64 | -------------------------------------------------------------------------------- /src/main/cpp/include/utils/memory_measure.h: -------------------------------------------------------------------------------- 1 | #ifndef MEMORY_MEASURE_H 2 | #define MEMORY_MEASURE_H 3 | 4 | #include 5 | #include 6 | 7 | typedef struct { 8 | unsigned long size,resident,share,text,lib,data,dt; 9 | } statm_t; 10 | 11 | void read_off_memory_status(statm_t& result, const size_t page_size=4096u); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /src/main/cpp/include/utils/profiling.h: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #ifndef PROFILING_H 24 | #define PROFILING_H 25 | 26 | #ifdef DO_PROFILING 27 | #include 28 | extern uint64_t g_num_disk_loads; 29 | extern uint64_t g_num_cached_loads; 30 | extern uint64_t g_coords_num_disk_loads; 31 | extern uint64_t g_coords_num_cached_loads; 32 | extern uint64_t g_total_num_tiles_loaded; 33 | extern std::vector g_num_tiles_loaded; 34 | extern std::vector g_num_segments_loaded; 35 | #endif 36 | 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /src/main/cpp/src/genomicsdb/variant_array_schema.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include "variant_array_schema.h" 24 | #include "variant_field_data.h" 25 | 26 | #define VERIFY_OR_THROW(X) if(!(X)) throw VariantArraySchemaException(#X); 27 | 28 | VariantArraySchema::VariantArraySchema(const std::string& array_name, 29 | const std::vector& attribute_names, 30 | const std::vector& dim_names, 31 | const std::vector >& dim_domains, 32 | const std::vector& types, 33 | const std::vector& val_num, 34 | const std::vector compression, 35 | int cell_order) 36 | : m_dim_type(typeid(int64_t)) 37 | { 38 | m_array_name = array_name; 39 | m_cell_order = cell_order; 40 | VERIFY_OR_THROW(attribute_names.size() == val_num.size()); 41 | VERIFY_OR_THROW(attribute_names.size()+1u == types.size() && 42 | compression.size() == types.size() && 43 | "Last element of types and compression vectors must specify type and compression of co-ordinates"); 44 | VERIFY_OR_THROW(dim_names.size() == dim_domains.size()); 45 | m_attributes_vector.resize(attribute_names.size()); 46 | for(auto i=0u;i g_variant_field_type_index_to_tiledb_type = 31 | std::unordered_map{ 32 | { std::type_index(typeid(int)), TILEDB_INT32 }, 33 | { std::type_index(typeid(int64_t)), TILEDB_INT64 }, 34 | { std::type_index(typeid(unsigned)), TILEDB_INT32 }, 35 | { std::type_index(typeid(uint64_t)), TILEDB_INT64 }, 36 | { std::type_index(typeid(float)), TILEDB_FLOAT32 }, 37 | { std::type_index(typeid(double)), TILEDB_FLOAT64 }, 38 | { std::type_index(typeid(char)), TILEDB_CHAR }, 39 | { std::type_index(typeid(bool)), TILEDB_CHAR } 40 | }; 41 | 42 | std::vector g_tiledb_type_to_variant_field_type_index = 43 | { 44 | std::type_index(typeid(int)), 45 | std::type_index(typeid(int64_t)), 46 | std::type_index(typeid(float)), 47 | std::type_index(typeid(double)), 48 | std::type_index(typeid(char)) 49 | }; 50 | 51 | std::unordered_map g_variant_field_type_index_to_bcf_ht_type = 52 | std::unordered_map{ 53 | { std::type_index(typeid(void)), BCF_HT_VOID }, 54 | { std::type_index(typeid(bool)), BCF_HT_FLAG }, 55 | { std::type_index(typeid(int)), BCF_HT_INT }, 56 | { std::type_index(typeid(int64_t)), BCF_HT_INT64 }, 57 | { std::type_index(typeid(unsigned)), BCF_HT_UINT }, 58 | { std::type_index(typeid(uint64_t)), BCF_HT_UINT64 }, 59 | { std::type_index(typeid(float)), BCF_HT_REAL }, 60 | { std::type_index(typeid(double)), BCF_HT_DOUBLE }, 61 | { std::type_index(typeid(std::string)), BCF_HT_STR }, 62 | { std::type_index(typeid(char)), BCF_HT_CHAR } 63 | }; 64 | 65 | 66 | size_t VariantFieldTypeUtil::size(const int bcf_ht_type) 67 | { 68 | switch(bcf_ht_type) 69 | { 70 | case BCF_HT_INT: 71 | return sizeof(int); 72 | case BCF_HT_INT64: 73 | return sizeof(int64_t); 74 | case BCF_HT_UINT: 75 | return sizeof(unsigned); 76 | case BCF_HT_UINT64: 77 | return sizeof(uint64_t); 78 | case BCF_HT_REAL: 79 | return sizeof(float); 80 | case BCF_HT_DOUBLE: 81 | return sizeof(double); 82 | case BCF_HT_STR: 83 | case BCF_HT_CHAR: 84 | return sizeof(char); 85 | default: 86 | return 0; 87 | } 88 | return 0; 89 | } 90 | -------------------------------------------------------------------------------- /src/main/cpp/src/utils/command_line.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include 24 | #include 25 | #include 26 | #include "command_line.h" 27 | 28 | enum ArgsIdxEnum 29 | { 30 | ARGS_IDX_IS_PRESORTED=1000, 31 | ARGS_IDX_TEST_C_POINTERS 32 | }; 33 | 34 | void parse_command_line(int argc, char** argv, CommandLineOpts& cl) 35 | { 36 | static struct option loptions[] = 37 | { 38 | {"array-name",1,0,'A'}, 39 | {"csv-file",1,0,'f'}, 40 | {"num-samples",1,0,'N'}, 41 | {"output",1,0,'o'}, 42 | {"position",1,0,'p'}, 43 | {"end-position",1,0,'e'}, 44 | {"position-list",1,0,'P'}, 45 | {"scan",0,0,'S'}, 46 | {"temp-space",1,0,'T'}, 47 | {"workspace",1,0,'w'}, 48 | {"presorted", 0, 0, ARGS_IDX_IS_PRESORTED}, 49 | {"test-c-pointers", 0, 0, ARGS_IDX_TEST_C_POINTERS}, 50 | {0,0,0,0} 51 | }; 52 | int c = 0; 53 | char* val; 54 | while ((c = getopt_long(argc, argv, "A:f:N:o:p:e:P:Sw:T:",loptions,NULL)) >= 0) { 55 | switch(c) 56 | { 57 | case 'w': 58 | cl.m_workspace = optarg; 59 | break; 60 | case 'f': 61 | cl.m_csv_filename = optarg; 62 | break; 63 | case 'A': 64 | cl.m_array_name = optarg; 65 | break; 66 | case 'N': //#samples 67 | cl.m_num_samples = strtoull(optarg, 0, 10); 68 | break; 69 | case 'o': 70 | val = optarg; 71 | cl.m_output_fstream.open(val); 72 | break; 73 | case 'p': //position 74 | cl.m_position = strtoull(optarg, 0, 10); 75 | break; 76 | case 'e': //end position 77 | cl.m_end_position = strtoull(optarg, 0, 10); 78 | break; 79 | case 'P': 80 | val = optarg; 81 | cl.m_positions_list.open(val); 82 | break; 83 | case 'S': 84 | cl.m_do_scan = true; 85 | break; 86 | case 'T': 87 | cl.m_temp_space = optarg; 88 | break; 89 | case ARGS_IDX_IS_PRESORTED: 90 | cl.m_is_input_csv_sorted = true; 91 | break; 92 | case ARGS_IDX_TEST_C_POINTERS: 93 | cl.m_test_C_pointers = true; 94 | break; 95 | default: 96 | std::cerr << "Unknown argument "<tv_sec = mts.tv_sec; 16 | tp->tv_nsec = mts.tv_nsec; 17 | return 0; 18 | } 19 | #endif 20 | -------------------------------------------------------------------------------- /src/main/java/com/intel/genomicsdb/Constants.java: -------------------------------------------------------------------------------- 1 | package com.intel.genomicsdb; 2 | 3 | public final class Constants { 4 | 5 | private Constants() { 6 | } 7 | 8 | public static final String CHROMOSOME_FOLDER_DELIMITER_SYMBOL = "$"; 9 | public static final String CHROMOSOME_FOLDER_DELIMITER_SYMBOL_REGEX = "\\$"; 10 | public static final String CHROMOSOME_INTERVAL_FOLDER = String.format("%%s%s%%d%s%%d", 11 | CHROMOSOME_FOLDER_DELIMITER_SYMBOL, CHROMOSOME_FOLDER_DELIMITER_SYMBOL); 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/com/intel/genomicsdb/GenomicsDBUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Omics Data Automation Inc. and Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | package com.intel.genomicsdb; 24 | 25 | import com.intel.genomicsdb.exception.GenomicsDBException; 26 | 27 | import static com.intel.genomicsdb.GenomicsDBUtilsJni.*; 28 | 29 | public class GenomicsDBUtils { 30 | /** 31 | * Create TileDB workspace 32 | * 33 | * @param workspace path to workspace directory 34 | * @param replace when set, the directory is deleted first if it exists 35 | * @return status 0 = workspace created, 36 | * -1 = path was not a directory, 37 | * -2 = failed to create workspace, 38 | * 1 = existing directory, nothing changed 39 | */ 40 | public static int createTileDBWorkspace(final String workspace, final boolean replace) { 41 | return jniCreateTileDBWorkspace(workspace, replace); 42 | } 43 | 44 | /** 45 | * Write contents into file 46 | * @param filename path to file 47 | * @param contents buffer to be written out 48 | * @return status 0 = OK 49 | */ 50 | public static int writeToFile(final String filename, final String contents) { 51 | return jniWriteToFile(filename, contents, (long)contents.length()); 52 | } 53 | 54 | /** 55 | * Copy source path contents to destination 56 | * @param source local filesystem path 57 | * @param destination local or cloud filesystem URI 58 | * @return status 0 = OK 59 | */ 60 | public static int moveFile(final String source, final String destination) { 61 | return jniMoveFile(source, destination); 62 | } 63 | 64 | /** 65 | * Consolidate TileDB array 66 | * 67 | * @param workspace path to workspace directory 68 | * @param arrayName array name 69 | */ 70 | public static void consolidateTileDBArray(final String workspace, final String arrayName) { 71 | jniConsolidateTileDBArray(workspace, arrayName); 72 | } 73 | 74 | /** 75 | * Checks if GenomicsDB array exists. 76 | * @param workspace workspace 77 | * @param arrayName arrayName 78 | * @return true if workspace with arrayName exists else return false 79 | */ 80 | public static boolean isGenomicsDBArray(final String workspace, final String arrayName) { 81 | return jniIsTileDBArray(workspace, arrayName); 82 | } 83 | 84 | /** 85 | * List the GenomicsDB arrays in the given workspace 86 | * @param workspace workspace 87 | * @return names of GenomicsDB arrays if they exist 88 | */ 89 | public static String[] listGenomicsDBArrays(final String workspace) { 90 | return jniListTileDBArrays(workspace); 91 | } 92 | 93 | } 94 | 95 | -------------------------------------------------------------------------------- /src/main/java/com/intel/genomicsdb/GenomicsDBUtilsJni.java: -------------------------------------------------------------------------------- 1 | /* 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Omics Data Automation Inc. and Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | package com.intel.genomicsdb; 24 | 25 | import com.intel.genomicsdb.exception.GenomicsDBException; 26 | 27 | public class GenomicsDBUtilsJni { 28 | static { 29 | try { 30 | boolean loaded = GenomicsDBLibLoader.loadLibrary(); 31 | if (!loaded) throw new GenomicsDBException("Could not load genomicsdb native library"); 32 | } catch (UnsatisfiedLinkError ule) { 33 | throw new GenomicsDBException("Could not load genomicsdb native library", ule); 34 | } 35 | } 36 | 37 | /** 38 | * Create TileDB workspace 39 | * 40 | * @param workspace path to workspace directory 41 | * @param replace if set existing directory is first deleted 42 | * @return status 0 = workspace created, 43 | * -1 = path was not a directory, 44 | * -2 = failed to create workspace, 45 | * 1 = existing directory, nothing changed 46 | */ 47 | public static native int jniCreateTileDBWorkspace(final String workspace, final boolean replace); 48 | 49 | /** 50 | * Consolidate TileDB array 51 | * 52 | * @param workspace path to workspace directory 53 | * @param arrayName array name 54 | */ 55 | public static native void jniConsolidateTileDBArray(final String workspace, final String arrayName); 56 | 57 | /** 58 | * Checks if GenomicsDB array exists. 59 | * @param workspace workspace 60 | * @param arrayName array 61 | * @return true if workspace with arrayName exists else return false 62 | */ 63 | public static native boolean jniIsTileDBArray(final String workspace, final String arrayName); 64 | 65 | /** 66 | * List Arrays in given workspace. 67 | * @param workspace 68 | * @return list of arrays in given workspace. 69 | */ 70 | public static native String[] jniListTileDBArrays(final String workspace); 71 | 72 | /** 73 | * Write contents into file 74 | * @param filename path to file, can be cloud URL 75 | * @param contents buffer to be written out 76 | * @param length of buffer to be written out 77 | * @return status 0 = OK 78 | */ 79 | public static native int jniWriteToFile(final String filename, final String contents, final long length); 80 | 81 | /** 82 | * Copy source path contents to destination 83 | * @param source path to source file, can be cloud URL 84 | * @param destination path to destination file, can be cloud URL 85 | * @return status 0 = OK 86 | */ 87 | public static native int jniMoveFile(final String source, final String destination); 88 | 89 | } 90 | 91 | -------------------------------------------------------------------------------- /src/main/java/com/intel/genomicsdb/exception/GenomicsDBException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | package com.intel.genomicsdb.exception; 24 | 25 | /** 26 | * Classes in the genomicsdb package throw exceptions of type GenomicsDBException 27 | */ 28 | public class GenomicsDBException extends RuntimeException 29 | { 30 | public GenomicsDBException(final String msg) 31 | { 32 | super(msg); 33 | } 34 | 35 | public GenomicsDBException(final String msg, Throwable throwable) 36 | { 37 | super(msg, throwable); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/com/intel/genomicsdb/importer/Constants.java: -------------------------------------------------------------------------------- 1 | package com.intel.genomicsdb.importer; 2 | 3 | import java.util.Arrays; 4 | import java.util.Collections; 5 | import java.util.HashSet; 6 | 7 | public final class Constants { 8 | 9 | private Constants() { 10 | } 11 | 12 | //Allele specific annotation fields 13 | public static final HashSet R_LENGTH_HISTOGRAM_FIELDS_FLOAT_BINS = new HashSet<>(Arrays.asList( 14 | "AS_RAW_BaseQRankSum", 15 | "AS_RAW_MQRankSum", 16 | "AS_RAW_ReadPosRankSum" 17 | )); 18 | public static final HashSet R_LENGTH_TWO_DIM_FLOAT_VECTOR_FIELDS = new HashSet<>(Collections.singletonList( 19 | "AS_RAW_MQ" 20 | )); 21 | public static final HashSet R_LENGTH_TWO_DIM_INT_VECTOR_FIELDS = new HashSet<>(Collections.singletonList( 22 | "AS_SB_TABLE" 23 | )); 24 | public static final long DEFAULT_BUFFER_CAPACITY = 20480; //20KB 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/com/intel/genomicsdb/importer/model/ChromosomeInterval.java: -------------------------------------------------------------------------------- 1 | /* 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | package com.intel.genomicsdb.importer.model; 24 | 25 | import htsjdk.samtools.util.Locatable; 26 | 27 | /** 28 | * Utility class to represent a chromosome interval 29 | * Contains 3 members - chr name, start, end (1-based) 30 | */ 31 | public class ChromosomeInterval implements Locatable { 32 | 33 | private String chromosomeName; 34 | private long begin; 35 | private long end; 36 | 37 | public ChromosomeInterval(final String name, final long begin, final long end) { 38 | this.chromosomeName = name; 39 | this.begin = begin; 40 | this.end = end; 41 | } 42 | 43 | @Override 44 | public String getContig() { 45 | return chromosomeName; 46 | } 47 | 48 | @Override 49 | public int getStart() { 50 | return (int) begin; 51 | } 52 | 53 | @Override 54 | public int getEnd() { 55 | return (int) end; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/com/intel/genomicsdb/importer/model/SampleInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | package com.intel.genomicsdb.importer.model; 24 | 25 | /** 26 | * Utility class that stores row index and globally unique name for a given sample 27 | */ 28 | public class SampleInfo { 29 | private String name; 30 | private long rowIdx; 31 | 32 | public SampleInfo(final String name, final long rowIdx) { 33 | this.name = name; 34 | this.rowIdx = rowIdx; 35 | } 36 | 37 | public String getName() { 38 | return name; 39 | } 40 | 41 | public long getRowIdx() { 42 | return rowIdx; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/intel/genomicsdb/model/BatchCompletionCallbackFunctionArgument.java: -------------------------------------------------------------------------------- 1 | /* 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2018 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | package com.intel.genomicsdb.model; 24 | 25 | public class BatchCompletionCallbackFunctionArgument { 26 | 27 | public int batchCount; 28 | public int totalBatchCount; 29 | 30 | public BatchCompletionCallbackFunctionArgument(final int batchCount, final int totalBatchCount) { 31 | this.batchCount = batchCount; 32 | this.totalBatchCount = totalBatchCount; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/com/intel/genomicsdb/reader/ChrArrayFolderComparator.java: -------------------------------------------------------------------------------- 1 | package com.intel.genomicsdb.reader; 2 | 3 | import java.util.Comparator; 4 | 5 | import static com.intel.genomicsdb.Constants.CHROMOSOME_FOLDER_DELIMITER_SYMBOL_REGEX; 6 | 7 | public class ChrArrayFolderComparator implements Comparator { 8 | @Override 9 | public int compare(String o1, String o2) { 10 | int chromCompare = extractChromsomeName(o1).compareTo(extractChromsomeName(o2)); 11 | if (chromCompare == 0) { 12 | return extractIntervalStart(o1) - extractIntervalStart(o2); 13 | } else { 14 | return chromCompare; 15 | } 16 | } 17 | 18 | private String extractChromsomeName(String s) { 19 | String[] values = s.split(CHROMOSOME_FOLDER_DELIMITER_SYMBOL_REGEX); 20 | return values[0]; 21 | } 22 | 23 | private int extractIntervalStart(String s) { 24 | String[] values = s.split(CHROMOSOME_FOLDER_DELIMITER_SYMBOL_REGEX); 25 | return Integer.parseInt(values[1]); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/com/intel/genomicsdb/reader/GenomicsDBFeatureReaderJni.java: -------------------------------------------------------------------------------- 1 | package com.intel.genomicsdb.reader; 2 | 3 | class GenomicsDBFeatureReaderJni { 4 | 5 | /** 6 | * Checks if GenomicsDB array exists. 7 | * @param workspace workspace 8 | * @param arrayName array name 9 | * @return true if workspace with arrayName exists else return false 10 | */ 11 | public static native boolean jniIsTileDBArray(final String workspace, final String arrayName); 12 | 13 | /** 14 | * List Arrays in given workspace. 15 | * @param workspace workspace 16 | * @return list of arrays in given workspace. 17 | */ 18 | public static native String[] jniListTileDBArrays(final String workspace); 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/com/intel/genomicsdb/reader/GenomicsDBTimer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | package com.intel.genomicsdb.reader; 24 | 25 | import java.io.PrintStream; 26 | import java.lang.management.ManagementFactory; 27 | 28 | public class GenomicsDBTimer { 29 | private long mBeginWallClockTime = 0; 30 | private long mBeginCpuTime = 0; 31 | 32 | //Cumulative time 33 | private double mCumulativeWallClockTime = 0; 34 | private double mCumulativeCpuTime = 0; 35 | 36 | public GenomicsDBTimer() { 37 | start(); 38 | } 39 | 40 | public void start() { 41 | mBeginWallClockTime = System.nanoTime(); 42 | mBeginCpuTime = ManagementFactory.getThreadMXBean().getCurrentThreadCpuTime(); 43 | } 44 | 45 | public void stop() { 46 | long endWallClockTime = System.nanoTime(); 47 | long endCpuTime = ManagementFactory.getThreadMXBean().getCurrentThreadCpuTime(); 48 | long mLastIntervalWallClockTime = (endWallClockTime - mBeginWallClockTime); 49 | long mLastIntervalCpuTime = (endCpuTime - mBeginCpuTime); 50 | mCumulativeWallClockTime += (((double) mLastIntervalWallClockTime) / 1e9); 51 | mCumulativeCpuTime += (((double) mLastIntervalCpuTime) / 1e9); 52 | } 53 | 54 | public void print(final String prefix, PrintStream fptr) { 55 | fptr.print("GENOMICSDB_TIMER,"); 56 | if (!prefix.isEmpty()) fptr.print(prefix + ','); 57 | fptr.println("Wall-clock time(s)," + mCumulativeWallClockTime + ",Cpu time(s)," + mCumulativeCpuTime); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/com/intel/genomicsdb/spark/GenomicsDBJavaSparkFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | package com.intel.genomicsdb.spark; 24 | 25 | import com.intel.genomicsdb.spark.GenomicsDBConfiguration; 26 | import com.intel.genomicsdb.spark.GenomicsDBInputFormat; 27 | import htsjdk.variant.variantcontext.VariantContext; 28 | import org.apache.hadoop.conf.Configuration; 29 | import org.apache.spark.SparkConf; 30 | import org.apache.spark.api.java.JavaPairRDD; 31 | import org.apache.spark.api.java.JavaRDD; 32 | import org.apache.spark.api.java.JavaSparkContext; 33 | 34 | import java.util.List; 35 | 36 | /** 37 | * This factory class exposes how a JavaRDD of variant contexts (htsjdk) 38 | * can be retrieved from GenomicsDB. In case of the newAPIHadoopRDD(), GenomicsDB 39 | * returns a JavaPairRDD where the genomics positions are the key. However, this 40 | * is seldom used in the variant contexts as downstream applications in HellBender 41 | * code uses only the values and ignores the key 42 | */ 43 | public final class GenomicsDBJavaSparkFactory { 44 | 45 | public static void usingNewAPIHadoopRDD(String[] args) { 46 | 47 | String loaderJsonFile = args[0]; 48 | String queryJsonFile = args[1]; 49 | String hostfile = args[2]; 50 | 51 | SparkConf conf = new SparkConf(); 52 | conf.setAppName("GenomicsDBTest using newAPIHadoopRDD"); 53 | JavaSparkContext sc = new JavaSparkContext(conf); 54 | 55 | Configuration hadoopConf = sc.hadoopConfiguration(); 56 | hadoopConf.set(GenomicsDBConfiguration.LOADERJSON, loaderJsonFile); 57 | hadoopConf.set(GenomicsDBConfiguration.QUERYJSON, queryJsonFile); 58 | hadoopConf.set(GenomicsDBConfiguration.MPIHOSTFILE, hostfile); 59 | 60 | JavaPairRDD variants; 61 | Class gformatClazz = GenomicsDBInputFormat.class; 62 | variants = sc.newAPIHadoopRDD(hadoopConf, gformatClazz, String.class, VariantContext.class); 63 | 64 | System.out.println("Number of variants "+variants.count()); 65 | List variantList = variants.collect(); 66 | for (Object variantObj : variantList) { 67 | System.out.println(variantObj); 68 | } 69 | } 70 | 71 | public static void main(String[] args) { 72 | usingNewAPIHadoopRDD(args); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/com/intel/genomicsdb/spark/GenomicsDBQueryInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 University of California, Los Angeles and Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | package com.intel.genomicsdb.spark; 24 | 25 | import java.io.IOException; 26 | import java.io.Serializable; 27 | 28 | /** 29 | * Maintain global information on query ranges which is used 30 | * to create GenomicsDBInputSplits 31 | */ 32 | class GenomicsDBQueryInfo implements Serializable { 33 | 34 | private long beginPosition; 35 | private long endPosition; 36 | 37 | public GenomicsDBQueryInfo(long start, long end) { 38 | beginPosition = start; 39 | endPosition = end; 40 | } 41 | 42 | public GenomicsDBQueryInfo(GenomicsDBQueryInfo copy) { 43 | this(copy.getBeginPosition(), copy.getEndPosition()); 44 | } 45 | 46 | public long getBeginPosition() { 47 | return beginPosition; 48 | } 49 | 50 | public long getEndPosition() { 51 | return endPosition; 52 | } 53 | 54 | public boolean equals(Object obj) { 55 | if (obj == this) { 56 | return true; 57 | } 58 | if (obj == null) { 59 | return false; 60 | } 61 | if (obj instanceof GenomicsDBQueryInfo) { 62 | GenomicsDBQueryInfo q = (GenomicsDBQueryInfo) obj; 63 | return q.getBeginPosition()==getBeginPosition() && 64 | q.getEndPosition()==getEndPosition(); 65 | } 66 | else { 67 | return false; 68 | } 69 | } 70 | 71 | public int hashCode() { 72 | return ((int)(getBeginPosition() + getEndPosition())); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/com/intel/genomicsdb/spark/GenomicsDBRecordReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | package com.intel.genomicsdb.spark; 24 | 25 | import com.intel.genomicsdb.reader.GenomicsDBFeatureReader; 26 | import htsjdk.tribble.CloseableTribbleIterator; 27 | import htsjdk.tribble.Feature; 28 | import org.apache.hadoop.classification.InterfaceAudience; 29 | import org.apache.hadoop.classification.InterfaceStability; 30 | import org.apache.hadoop.mapreduce.InputSplit; 31 | import org.apache.hadoop.mapreduce.RecordReader; 32 | import org.apache.hadoop.mapreduce.TaskAttemptContext; 33 | 34 | import java.io.IOException; 35 | 36 | @InterfaceAudience.Public 37 | @InterfaceStability.Stable 38 | public class GenomicsDBRecordReader 39 | extends RecordReader { 40 | 41 | private final GenomicsDBFeatureReader featureReader; 42 | private CloseableTribbleIterator iterator; 43 | private VCONTEXT currentVariant; 44 | private long currentKey; 45 | 46 | GenomicsDBRecordReader(GenomicsDBFeatureReader featureReader) { 47 | this.featureReader = featureReader; 48 | this.currentKey = -1; 49 | } 50 | 51 | public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) 52 | throws IOException, InterruptedException { 53 | initialize(); 54 | } 55 | 56 | private void initialize() throws IOException { 57 | this.iterator = featureReader.iterator(); 58 | } 59 | 60 | public boolean nextKeyValue() throws IOException, InterruptedException { 61 | if (this.iterator.hasNext()) { 62 | this.currentVariant = iterator.next(); 63 | this.currentKey++; 64 | return true; 65 | } else return false; 66 | } 67 | 68 | @Override 69 | public String getCurrentKey() throws IOException, InterruptedException { 70 | return Long.toString(currentKey); 71 | } 72 | 73 | public VCONTEXT getCurrentValue() throws IOException, InterruptedException { 74 | return currentVariant; 75 | } 76 | 77 | public float getProgress() throws IOException, InterruptedException { 78 | return 0; 79 | } 80 | 81 | public void close() throws IOException { 82 | this.iterator.close(); 83 | } 84 | 85 | public Boolean hasNext() { 86 | return this.iterator.hasNext(); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/main/jni/include/genomicsdb_GenomicsDBImporter.h: -------------------------------------------------------------------------------- 1 | /* DO NOT EDIT THIS FILE - it is machine generated */ 2 | #include 3 | /* Header for class com_intel_genomicsdb_importer_GenomicsDBImporterJni */ 4 | 5 | #ifndef _Included_com_intel_genomicsdb_importer_GenomicsDBImporterJni 6 | #define _Included_com_intel_genomicsdb_importer_GenomicsDBImporterJni 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | /* 11 | * Class: com_intel_genomicsdb_importer_GenomicsDBImporterJni 12 | * Method: jniGetChromosomeIntervalsForColumnPartition 13 | * Signature: (Ljava/lang/String;I)Ljava/lang/String; 14 | */ 15 | JNIEXPORT jstring JNICALL Java_com_intel_genomicsdb_importer_GenomicsDBImporterJni_jniGetChromosomeIntervalsForColumnPartition 16 | (JNIEnv *, jclass, jstring, jint); 17 | 18 | /* 19 | * Class: com_intel_genomicsdb_importer_GenomicsDBImporterJni 20 | * Method: jniConsolidateTileDBArray 21 | * Signature: (Ljava/lang/String;Ljava/lang/String;)V 22 | */ 23 | JNIEXPORT void JNICALL Java_com_intel_genomicsdb_importer_GenomicsDBImporterJni_jniConsolidateTileDBArray 24 | (JNIEnv *, jclass, jstring, jstring); 25 | 26 | /* 27 | * Class: com_intel_genomicsdb_importer_GenomicsDBImporterJni 28 | * Method: jniGenomicsDBImporter 29 | * Signature: (Ljava/lang/String;I)I 30 | */ 31 | JNIEXPORT jint JNICALL Java_com_intel_genomicsdb_importer_GenomicsDBImporterJni_jniGenomicsDBImporter 32 | (JNIEnv *, jobject, jstring, jint); 33 | 34 | /* 35 | * Class: com_intel_genomicsdb_importer_GenomicsDBImporterJni 36 | * Method: jniInitializeGenomicsDBImporterObject 37 | * Signature: (Ljava/lang/String;I)J 38 | */ 39 | JNIEXPORT jlong JNICALL Java_com_intel_genomicsdb_importer_GenomicsDBImporterJni_jniInitializeGenomicsDBImporterObject 40 | (JNIEnv *, jobject, jstring, jint); 41 | 42 | /* 43 | * Class: com_intel_genomicsdb_importer_GenomicsDBImporterJni 44 | * Method: jniCopyVidMap 45 | * Signature: (J[B)J 46 | */ 47 | JNIEXPORT jlong JNICALL Java_com_intel_genomicsdb_importer_GenomicsDBImporterJni_jniCopyVidMap 48 | (JNIEnv *, jobject, jlong, jbyteArray); 49 | 50 | /* 51 | * Class: com_intel_genomicsdb_importer_GenomicsDBImporterJni 52 | * Method: jniCopyCallsetMap 53 | * Signature: (J[B)J 54 | */ 55 | JNIEXPORT jlong JNICALL Java_com_intel_genomicsdb_importer_GenomicsDBImporterJni_jniCopyCallsetMap 56 | (JNIEnv *, jobject, jlong, jbyteArray); 57 | 58 | /* 59 | * Class: com_intel_genomicsdb_importer_GenomicsDBImporterJni 60 | * Method: jniAddBufferStream 61 | * Signature: (JLjava/lang/String;ZJ[BJ)V 62 | */ 63 | JNIEXPORT void JNICALL Java_com_intel_genomicsdb_importer_GenomicsDBImporterJni_jniAddBufferStream 64 | (JNIEnv *, jobject, jlong, jstring, jboolean, jlong, jbyteArray, jlong); 65 | 66 | /* 67 | * Class: com_intel_genomicsdb_importer_GenomicsDBImporterJni 68 | * Method: jniSetupGenomicsDBLoader 69 | * Signature: (JLjava/lang/String;)J 70 | */ 71 | JNIEXPORT jlong JNICALL Java_com_intel_genomicsdb_importer_GenomicsDBImporterJni_jniSetupGenomicsDBLoader 72 | (JNIEnv *, jobject, jlong, jstring); 73 | 74 | /* 75 | * Class: com_intel_genomicsdb_importer_GenomicsDBImporterJni 76 | * Method: jniWriteDataToBufferStream 77 | * Signature: (JII[BJ)V 78 | */ 79 | JNIEXPORT void JNICALL Java_com_intel_genomicsdb_importer_GenomicsDBImporterJni_jniWriteDataToBufferStream 80 | (JNIEnv *, jobject, jlong, jint, jint, jbyteArray, jlong); 81 | 82 | /* 83 | * Class: com_intel_genomicsdb_importer_GenomicsDBImporterJni 84 | * Method: jniImportBatch 85 | * Signature: (J[J)Z 86 | */ 87 | JNIEXPORT jboolean JNICALL Java_com_intel_genomicsdb_importer_GenomicsDBImporterJni_jniImportBatch 88 | (JNIEnv *, jobject, jlong, jlongArray); 89 | 90 | #ifdef __cplusplus 91 | } 92 | #endif 93 | #endif 94 | -------------------------------------------------------------------------------- /src/main/jni/include/genomicsdb_GenomicsDBLibLoader.h: -------------------------------------------------------------------------------- 1 | /* DO NOT EDIT THIS FILE - it is machine generated */ 2 | #include 3 | /* Header for class com_intel_genomicsdb_GenomicsDBLibLoader */ 4 | 5 | #ifndef _Included_com_intel_genomicsdb_GenomicsDBLibLoader 6 | #define _Included_com_intel_genomicsdb_GenomicsDBLibLoader 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | /* 11 | * Class: com_intel_genomicsdb_GenomicsDBLibLoader 12 | * Method: jniGenomicsDBOneTimeInitialize 13 | * Signature: ()I 14 | */ 15 | JNIEXPORT jint JNICALL Java_com_intel_genomicsdb_GenomicsDBLibLoader_jniGenomicsDBOneTimeInitialize 16 | (JNIEnv *, jclass); 17 | 18 | #ifdef __cplusplus 19 | } 20 | #endif 21 | #endif 22 | -------------------------------------------------------------------------------- /src/main/jni/include/genomicsdb_GenomicsDBQueryStream.h: -------------------------------------------------------------------------------- 1 | /* DO NOT EDIT THIS FILE - it is machine generated */ 2 | #include 3 | /* Header for class com_intel_genomicsdb_reader_GenomicsDBQueryStream */ 4 | 5 | #ifndef _Included_com_intel_genomicsdb_reader_GenomicsDBQueryStream 6 | #define _Included_com_intel_genomicsdb_reader_GenomicsDBQueryStream 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | #undef com_intel_genomicsdb_reader_GenomicsDBQueryStream_MAX_SKIP_BUFFER_SIZE 11 | #define com_intel_genomicsdb_reader_GenomicsDBQueryStream_MAX_SKIP_BUFFER_SIZE 2048L 12 | /* 13 | * Class: com_intel_genomicsdb_reader_GenomicsDBQueryStream 14 | * Method: jniGenomicsDBInit 15 | * Signature: (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;IIIJJ)J 16 | */ 17 | JNIEXPORT jlong JNICALL Java_com_intel_genomicsdb_reader_GenomicsDBQueryStream_jniGenomicsDBInit 18 | (JNIEnv *, jobject, jstring, jstring, jstring, jint, jint, jint, jlong, jlong, jboolean, jboolean, jboolean, jboolean); 19 | 20 | /* 21 | * Class: com_intel_genomicsdb_reader_GenomicsDBQueryStream 22 | * Method: jniGenomicsDBClose 23 | * Signature: (J)J 24 | */ 25 | JNIEXPORT jlong JNICALL Java_com_intel_genomicsdb_reader_GenomicsDBQueryStream_jniGenomicsDBClose 26 | (JNIEnv *, jobject, jlong); 27 | 28 | /* 29 | * Class: com_intel_genomicsdb_reader_GenomicsDBQueryStream 30 | * Method: jniGenomicsDBGetNumBytesAvailable 31 | * Signature: (J)J 32 | */ 33 | JNIEXPORT jlong JNICALL Java_com_intel_genomicsdb_reader_GenomicsDBQueryStream_jniGenomicsDBGetNumBytesAvailable 34 | (JNIEnv *, jobject, jlong); 35 | 36 | /* 37 | * Class: com_intel_genomicsdb_reader_GenomicsDBQueryStream 38 | * Method: jniGenomicsDBReadNextByte 39 | * Signature: (J)B 40 | */ 41 | JNIEXPORT jbyte JNICALL Java_com_intel_genomicsdb_reader_GenomicsDBQueryStream_jniGenomicsDBReadNextByte 42 | (JNIEnv *, jobject, jlong); 43 | 44 | /* 45 | * Class: com_intel_genomicsdb_reader_GenomicsDBQueryStream 46 | * Method: jniGenomicsDBRead 47 | * Signature: (J[BII)I 48 | */ 49 | JNIEXPORT jint JNICALL Java_com_intel_genomicsdb_reader_GenomicsDBQueryStream_jniGenomicsDBRead 50 | (JNIEnv *, jobject, jlong, jbyteArray, jint, jint); 51 | 52 | /* 53 | * Class: com_intel_genomicsdb_reader_GenomicsDBQueryStream 54 | * Method: jniGenomicsDBSkip 55 | * Signature: (JJ)J 56 | */ 57 | JNIEXPORT jlong JNICALL Java_com_intel_genomicsdb_reader_GenomicsDBQueryStream_jniGenomicsDBSkip 58 | (JNIEnv *, jobject, jlong, jlong); 59 | 60 | #ifdef __cplusplus 61 | } 62 | #endif 63 | #endif 64 | -------------------------------------------------------------------------------- /src/main/jni/include/genomicsdb_GenomicsDBUtils.h: -------------------------------------------------------------------------------- 1 | /* DO NOT EDIT THIS FILE - it is machine generated */ 2 | #include 3 | /* Header for class com_intel_genomicsdb_GenomicsDBUtilsJni */ 4 | 5 | #ifndef _Included_com_intel_genomicsdb_GenomicsDBUtilsJni 6 | #define _Included_com_intel_genomicsdb_GenomicsDBUtilsJni 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | /* 11 | * Class: com_intel_genomicsdb_GenomicsDBUtilsJni 12 | * Method: jniCreateTileDBWorkspace 13 | * Signature: (Ljava/lang/String;Z)I 14 | */ 15 | JNIEXPORT jint JNICALL Java_com_intel_genomicsdb_GenomicsDBUtilsJni_jniCreateTileDBWorkspace 16 | (JNIEnv *, jclass, jstring, jboolean); 17 | 18 | /* 19 | * Class: com_intel_genomicsdb_GenomicsDBUtilsJni 20 | * Method: jniIsTileDBArray 21 | * Signature: (Ljava/lang/String;Ljava/lang/String;)Z 22 | */ 23 | JNIEXPORT jboolean JNICALL Java_com_intel_genomicsdb_GenomicsDBUtilsJni_jniIsTileDBArray 24 | (JNIEnv *, jclass, jstring, jstring); 25 | 26 | /* 27 | * Class: com_intel_genomicsdb_GenomicsDBUtilsJni 28 | * Method: jniListTileDBArrays 29 | * Signature: (Ljava/lang/String;)[Ljava/lang/String; 30 | */ 31 | JNIEXPORT jobjectArray JNICALL Java_com_intel_genomicsdb_GenomicsDBUtilsJni_jniListTileDBArrays 32 | (JNIEnv *, jclass, jstring); 33 | 34 | /* 35 | * Class: com_intel_genomicsdb_GenomicsDBUtilsJni 36 | * Method: jniWriteToFile 37 | * Signature: (Ljava/lang/String;Ljava/lang/String;J)I 38 | */ 39 | JNIEXPORT jint JNICALL Java_com_intel_genomicsdb_GenomicsDBUtilsJni_jniWriteToFile 40 | (JNIEnv *, jclass, jstring, jstring, jlong); 41 | 42 | /* 43 | * Class: com_intel_genomicsdb_GenomicsDBUtilsJni 44 | * Method: jniMoveFile 45 | * Signature: (Ljava/lang/String;Ljava/lang/String;)I 46 | */ 47 | JNIEXPORT jint JNICALL Java_com_intel_genomicsdb_GenomicsDBUtilsJni_jniMoveFile 48 | (JNIEnv *, jclass, jstring, jstring); 49 | 50 | #ifdef __cplusplus 51 | } 52 | #endif 53 | #endif 54 | -------------------------------------------------------------------------------- /src/main/jni/include/jni_mpi_init.h: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #ifndef JNI_MPI_INIT_H 24 | #define JNI_MPI_INIT_H 25 | 26 | #include "headers.h" 27 | #ifndef DISABLE_MPI 28 | #include 29 | #endif 30 | 31 | class JNIMpiInit 32 | { 33 | public: 34 | JNIMpiInit() 35 | { 36 | m_my_mpi_rank = 0; 37 | m_mpi_initialized = false; 38 | } 39 | void initialize() 40 | { 41 | if(!m_mpi_initialized) 42 | { 43 | #ifndef DISABLE_MPI 44 | 45 | //Initialize MPI environment 46 | auto rc = MPI_Init(0, 0); 47 | if (rc != MPI_SUCCESS) 48 | printf("WARNING: MPI_Init() failed - cannot obtain MPI rank\n"); 49 | else 50 | { 51 | //Get my world rank 52 | MPI_Comm_rank(MPI_COMM_WORLD, &m_my_mpi_rank); 53 | m_mpi_initialized = true; 54 | } 55 | #endif 56 | } 57 | } 58 | ~JNIMpiInit() 59 | { 60 | #ifndef DISABLE_MPI 61 | if(m_mpi_initialized) 62 | MPI_Finalize(); 63 | #endif 64 | m_mpi_initialized = false; 65 | } 66 | int get_mpi_rank() const { return m_my_mpi_rank; } 67 | int get_mpi_rank(const int supplied_rank) const 68 | { 69 | return (supplied_rank != 0 || !m_mpi_initialized) ? supplied_rank : m_my_mpi_rank; 70 | } 71 | private: 72 | int m_my_mpi_rank; 73 | bool m_mpi_initialized; 74 | }; 75 | 76 | extern JNIMpiInit g_jni_mpi_init; 77 | 78 | #endif 79 | -------------------------------------------------------------------------------- /src/main/jni/src/genomicsdb_jni_init.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include "jni_mpi_init.h" 24 | #include "genomicsdb_GenomicsDBLibLoader.h" 25 | 26 | JNIMpiInit g_jni_mpi_init; 27 | 28 | JNIEXPORT jint JNICALL Java_com_intel_genomicsdb_GenomicsDBLibLoader_jniGenomicsDBOneTimeInitialize 29 | (JNIEnv * env, jclass obj) 30 | { 31 | g_jni_mpi_init.initialize(); 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /src/main/scala/com/intel/genomicsdb/GenomicsDBContext.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | package com.intel.genomicsdb 24 | 25 | import htsjdk.tribble.readers.PositionalBufferedStream 26 | import htsjdk.variant.variantcontext.VariantContext 27 | import org.apache.hadoop.conf.Configuration 28 | import org.apache.log4j.Logger 29 | import org.apache.spark.{SparkConf, SparkContext} 30 | import org.apache.spark.rdd.RDD 31 | 32 | /** 33 | * GenomicsDB context 34 | * 35 | * @param conf Hadoop configuration including GenomicsDB 36 | * keys. Copy these keys in case we have to 37 | * create a new SparkContext 38 | */ 39 | class GenomicsDBContext(val conf: Configuration, var sparkContext: SparkContext = null) { 40 | 41 | val logger = Logger.getLogger(classOf[GenomicsDBContext]) 42 | 43 | if (sparkContext == null) { 44 | val conf = new SparkConf() 45 | conf.setMaster(conf.get("spark.master")) 46 | conf.setAppName("GenomicsDBTest") 47 | 48 | sparkContext = new SparkContext(conf) 49 | logger.info("new Spark Context created from GenomicsDB Context") 50 | } else { 51 | logger.info("Creating GenomicsDB Context from Spark Context") 52 | } 53 | 54 | def getVariantContexts: RDD[VariantContext] = { 55 | new GenomicsDBRDD[VariantContext, PositionalBufferedStream](this, 1) 56 | } 57 | } 58 | 59 | -------------------------------------------------------------------------------- /src/main/scala/com/intel/genomicsdb/GenomicsDBPartition.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | package com.intel.genomicsdb 24 | 25 | import com.intel.genomicsdb.spark.GenomicsDBInputSplit 26 | import htsjdk.tribble.Feature 27 | import org.apache.hadoop.io.Writable 28 | import org.apache.hadoop.mapreduce.InputSplit 29 | import org.apache.spark.{Partition, SerializableWritable} 30 | 31 | /** 32 | * GenomicsDBPartition is a container class for RDD partitions for the underlying variant 33 | * data. It has a 1:1 mapping between GenomicsDB input splits 34 | * 35 | * @param rddId unique rdd identifier 36 | * @param index index for task scheduler 37 | * @param rawSplit GenomicsDB input split passed from RDD getpartitions 38 | * [[GenomicsDBInputSplit]] 39 | */ 40 | private[genomicsdb] class GenomicsDBPartition[VCONTEXT <: Feature, SOURCE]( 41 | rddId: Int, 42 | val index: Int, 43 | rawSplit: InputSplit with Writable) 44 | extends Partition { 45 | 46 | val serializableSplit = new SerializableWritable(rawSplit) 47 | 48 | override def hashCode: Int = 31 * (31 + rddId) + index 49 | 50 | override def equals(other: Any): Boolean = super.equals(other) 51 | } 52 | 53 | /** 54 | * Partitioning strategy can be either row-major or column-major 55 | * The value is specified in partitioning schema in loader JSON 56 | * file 57 | */ 58 | object GenomicsDBPartitionStrategy extends Enumeration { 59 | val ROW_MAJOR, COL_MAJOR = Value 60 | } -------------------------------------------------------------------------------- /src/resources/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(PROTOBUF_PROTO_FILES 2 | genomicsdb_coordinates.proto 3 | genomicsdb_callsets_mapping.proto 4 | genomicsdb_export_config.proto 5 | genomicsdb_import_config.proto 6 | genomicsdb_vid_mapping.proto 7 | ) 8 | if(PROTOBUF_REGENERATE) 9 | PROTOBUF_GENERATE_CPP(PROTOBUF_GENERATED_CXX_SRCS PROTOBUF_GENERATED_CXX_HDRS ${PROTOBUF_PROTO_FILES}) 10 | set(PROTOBUF_GENERATED_CXX_HDRS_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR} 11 | CACHE INTERNAL "Path to protocol buffers generated C++ headers") 12 | add_custom_target(PROTOBUF_GENERATED_CXX_TARGET DEPENDS ${PROTOBUF_GENERATED_CXX_SRCS} ${PROTOBUF_GENERATED_CXX_HDRS}) 13 | set(PROTOBUF_GENERATED_JAVA_SRCS 14 | ${GENOMICSDB_MAVEN_BUILD_DIR}/protobuf/com/intel/genomicsdb/GenomicsDBColumn.java 15 | ${GENOMICSDB_MAVEN_BUILD_DIR}/protobuf/com/intel/genomicsdb/GenomicsDBCallsetsMapProto.java 16 | ${GENOMICSDB_MAVEN_BUILD_DIR}/protobuf/com/intel/genomicsdb/GenomicsDBExportConfiguration.java 17 | ${GENOMICSDB_MAVEN_BUILD_DIR}/protobuf/com/intel/genomicsdb/GenomicsDBImportConfiguration.java 18 | ${GENOMICSDB_MAVEN_BUILD_DIR}/protobuf/com/intel/genomicsdb/GenomicsDBVidMapProto.java 19 | ) 20 | else() 21 | set(PROTOBUF_GENERATED_CXX_HDRS_INCLUDE_DIRS "${CMAKE_SOURCE_DIR}/src/main/protobuf-generated/cpp/include") 22 | set(PROTOBUF_GENERATED_CXX_SRCS "") 23 | set(PROTOBUF_GENERATED_CXX_HDRS "") 24 | foreach(PROTO_FILE ${PROTOBUF_PROTO_FILES}) 25 | get_filename_component(CURR_FILENAME ${PROTO_FILE} NAME_WE) 26 | list(APPEND PROTOBUF_GENERATED_CXX_SRCS "protobuf-generated/cpp/src/${CURR_FILENAME}.pb.cc") 27 | list(APPEND PROTOBUF_GENERATED_CXX_HDRS "${PROTOBUF_GENERATED_CXX_HDRS_INCLUDE_DIRS}/${CURR_FILENAME}.pb.h") 28 | endforeach() 29 | set(PROTOBUF_GENERATED_JAVA_SRCS 30 | src/main/protobuf-generated/java/com/intel/genomicsdb/GenomicsDBCallsetsMapProto.java 31 | src/main/protobuf-generated/java/com/intel/genomicsdb/GenomicsDBExportConfiguration.java 32 | src/main/protobuf-generated/java/com/intel/genomicsdb/GenomicsDBImportConfiguration.java 33 | src/main/protobuf-generated/java/com/intel/genomicsdb/GenomicsDBVidMapProto.java 34 | ) 35 | endif() 36 | set(PROTOBUF_GENERATED_CXX_SRCS ${PROTOBUF_GENERATED_CXX_SRCS} CACHE INTERNAL "Protocol buffers generated C++ sources") 37 | set(PROTOBUF_GENERATED_CXX_HDRS ${PROTOBUF_GENERATED_CXX_HDRS} CACHE INTERNAL "Protocol buffers generated C++ headers") 38 | set(PROTOBUF_GENERATED_CXX_HDRS_INCLUDE_DIRS ${PROTOBUF_GENERATED_CXX_HDRS_INCLUDE_DIRS} CACHE INTERNAL "Directory containing Protocol buffers generated C++ headers") 39 | set(PROTOBUF_GENERATED_JAVA_SRCS ${PROTOBUF_GENERATED_JAVA_SRCS} CACHE INTERNAL "Protocol buffers generated Java sources") 40 | -------------------------------------------------------------------------------- /src/resources/genomicsdb-spark-submit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$SPARK_HOME" ]; then 4 | echo "SPARK_HOME not set" 5 | exit 6 | fi 7 | 8 | SPARK_MASTER_URL="$1" 9 | shift 10 | 11 | CLASS=com.intel.genomicsdb.spark.GenomicsDBJavaSparkFactory 12 | GENOMICSDB_JAR=genomicsdb-0.4.0-jar-with-dependencies.jar 13 | GENOMICSDB_BIN_DIR="$(cd `dirname $0`; pwd)" 14 | GENOMICSDB_JAR_PATH=$GENOMICSDB_BIN_DIR/$GENOMICSDB_JAR 15 | 16 | CLASSPATH=$CLASSPATH:$GENOMICSDB_JAR_PATH 17 | 18 | $SPARK_HOME/bin/spark-submit \ 19 | --class $CLASS \ 20 | --master $SPARK_MASTER_URL \ 21 | --deploy-mode client \ 22 | "$GENOMICSDB_JAR_PATH" \ 23 | "$@" 24 | -------------------------------------------------------------------------------- /src/resources/genomicsdb_callsets_mapping.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | /** 24 | * Note: Variable names are kept aligned with 25 | * GenomicsDB JSON configuration. 26 | * 27 | * To build, use protocol buffer version >3.2.0 and the following command from base directory: 28 | * $ protoc -Isrc/resources/ --java_out=src/main/java/ \ 29 | * src/resources/genomicsdb_callsets_mapping.proto 30 | */ 31 | 32 | syntax = "proto2"; 33 | 34 | option java_package = "com.intel.genomicsdb.model"; 35 | option java_outer_classname = "GenomicsDBCallsetsMapProto"; 36 | 37 | message SampleIDToTileDBIDMap { 38 | required string sample_name = 1; 39 | required int64 row_idx = 2; 40 | required int64 idx_in_file = 3; 41 | optional string stream_name = 4; 42 | } 43 | 44 | message CallsetMappingPB { 45 | repeated SampleIDToTileDBIDMap callsets = 1; 46 | } 47 | -------------------------------------------------------------------------------- /src/resources/genomicsdb_coordinates.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2018 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | syntax = "proto2"; 24 | 25 | option java_package = "com.intel.genomicsdb.model"; 26 | option java_outer_classname = "Coordinates"; 27 | 28 | message ContigPosition { 29 | required string contig = 1; 30 | required int64 position = 2; 31 | } 32 | 33 | message GenomicsDBColumn { 34 | oneof column { 35 | int64 tiledb_column = 1; 36 | ContigPosition contig_position = 2; 37 | } 38 | } 39 | 40 | message TileDBColumnInterval { 41 | required int64 begin = 1; 42 | required int64 end = 2; 43 | } 44 | 45 | message ContigInterval { 46 | required string contig = 1; 47 | required int64 begin = 2; 48 | required int64 end = 3; 49 | } 50 | 51 | message GenomicsDBColumnInterval { 52 | oneof interval { 53 | TileDBColumnInterval column_interval = 1; 54 | ContigInterval contig_interval = 2; 55 | } 56 | } 57 | 58 | message GenomicsDBColumnOrInterval { 59 | oneof column_or_interval { 60 | GenomicsDBColumn column = 1; 61 | GenomicsDBColumnInterval column_interval = 2; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/resources/genomicsdb_export_config.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | /** 24 | * To build, use protocol buffer version >3.2.0 and the following command from base directory: 25 | * $ protoc -Isrc/resources/ --java_out=src/main/java/ src/resources/genomicsdb_export_config.proto 26 | */ 27 | 28 | syntax = "proto2"; 29 | 30 | import "genomicsdb_coordinates.proto"; 31 | import "genomicsdb_vid_mapping.proto"; 32 | import "genomicsdb_callsets_mapping.proto"; 33 | 34 | option java_package = "com.intel.genomicsdb.model"; 35 | option java_outer_classname = "GenomicsDBExportConfiguration"; 36 | 37 | message GenomicsDBColumnOrIntervalList { 38 | repeated GenomicsDBColumnOrInterval column_or_interval_list = 1; 39 | } 40 | 41 | message RowRange { 42 | required int64 low = 1; 43 | required int64 high = 2; 44 | } 45 | 46 | message RowRangeList { 47 | repeated RowRange range_list = 1; 48 | } 49 | 50 | message ExportConfiguration { 51 | required string workspace = 1; 52 | oneof array { 53 | string array_name = 2; 54 | bool generate_array_name_from_partition_bounds = 18; 55 | } 56 | required string reference_genome = 3; 57 | repeated GenomicsDBColumnOrIntervalList query_column_ranges = 4; 58 | repeated RowRangeList query_row_ranges = 5; 59 | repeated string attributes = 6; 60 | optional string vcf_header_filename = 7; 61 | optional string vcf_output_filename = 8; 62 | optional string vcf_output_format = 9; 63 | oneof vid_mapping_info { 64 | string vid_mapping_file = 10; 65 | VidMappingPB vid_mapping = 19; 66 | } 67 | oneof callset_mapping_info { 68 | string callset_mapping_file = 11; 69 | CallsetMappingPB callset_mapping = 20; 70 | } 71 | 72 | optional uint32 max_diploid_alt_alleles_that_can_be_genotyped = 12; 73 | optional bool index_output_VCF = 13; 74 | optional bool produce_GT_field = 14; 75 | optional bool produce_FILTER_field = 15; 76 | optional bool sites_only_query = 16; 77 | optional bool produce_GT_with_min_PL_value_for_spanning_deletions = 17; 78 | optional bool scan_full = 21; 79 | } 80 | -------------------------------------------------------------------------------- /src/resources/genomicsdb_import_config.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | /** 24 | * Note: Variable names are kept aligned with 25 | * GenomicsDB JSON configuration. If decide to 26 | * change them back to camel case, docs must be 27 | * updated to be consistent 28 | * 29 | * To build, use protocol buffer version >3.2.0 and the following command from base directory: 30 | * $ protoc -Isrc/resources/ --java_out=src/main/java/ src/resources/genomicsdb_import_config.proto 31 | */ 32 | 33 | syntax = "proto2"; 34 | 35 | option java_package = "com.intel.genomicsdb.model"; 36 | option java_outer_classname = "GenomicsDBImportConfiguration"; 37 | 38 | import "genomicsdb_coordinates.proto"; 39 | import "genomicsdb_vid_mapping.proto"; 40 | import "genomicsdb_callsets_mapping.proto"; 41 | 42 | message Partition { 43 | required GenomicsDBColumn begin = 1; 44 | optional string workspace = 2; 45 | oneof array { 46 | string array_name = 3; 47 | bool generate_array_name_from_partition_bounds = 4; 48 | } 49 | optional string vcf_output_filename = 5; 50 | optional GenomicsDBColumn end = 6; 51 | } 52 | 53 | message ImportConfiguration { 54 | required int64 size_per_column_partition = 7 [default = 16384]; 55 | optional bool row_based_partitioning = 1 [default = false]; 56 | optional bool produce_combined_vcf = 2 [default = false]; 57 | optional bool produce_tiledb_array = 3 [default = true]; 58 | repeated Partition column_partitions = 4; 59 | oneof vid_mapping_options { 60 | string vid_mapping_file = 5; 61 | VidMappingPB vid_mapping = 25; 62 | } 63 | oneof callset_mapping_options { 64 | string callset_mapping_file = 6; 65 | CallsetMappingPB callset_mapping = 26; 66 | } 67 | optional bool treat_deletions_as_intervals = 8 [default = true]; 68 | optional int32 num_parallel_vcf_files = 9 [default = 1]; 69 | optional bool delete_and_create_tiledb_array = 10 [default = false]; 70 | optional bool do_ping_pong_buffering = 11 [default = true]; 71 | optional bool offload_vcf_output_processing = 12 [default = true]; 72 | optional bool discard_vcf_index = 13 [default = true]; 73 | optional int64 segment_size = 14 [default = 10485760]; 74 | optional bool compress_tiledb_array = 15 [default = true]; 75 | optional int64 num_cells_per_tile = 16 [default = 1000]; 76 | optional bool fail_if_updating = 17 [default = false]; 77 | optional int32 tiledb_compression_level = 19 [default = -1]; 78 | optional bool consolidate_tiledb_array_after_load = 20 [default = false]; 79 | optional bool disable_synced_writes = 21 [default = true]; 80 | optional bool ignore_cells_not_in_partition = 22; 81 | optional int64 lb_callset_row_idx = 23 [ default = 0 ]; 82 | optional int64 ub_callset_row_idx = 24; 83 | optional bool disable_file_locking_in_tiledb = 27 [ default = false ]; 84 | } 85 | -------------------------------------------------------------------------------- /src/resources/genomicsdb_vid_mapping.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | /** 24 | * Note: Variable names are kept aligned with 25 | * GenomicsDB JSON configuration. 26 | * 27 | * To build, use protocol buffer version >3.2.0 and the following command from base directory: 28 | * $ protoc -Isrc/resources/ --java_out=src/main/java/ src/resources/genomicsdb_vid_mapping.proto 29 | */ 30 | 31 | syntax = "proto2"; 32 | 33 | option java_package = "com.intel.genomicsdb.model"; 34 | option java_outer_classname = "GenomicsDBVidMapProto"; 35 | 36 | message FieldLengthDescriptorComponentPB { 37 | oneof length_descriptor { 38 | string variable_length_descriptor = 1; 39 | int32 fixed_length = 2; 40 | } 41 | } 42 | 43 | message GenomicsDBFieldInfo { 44 | required string name = 1; 45 | repeated string type = 2; 46 | repeated string vcf_field_class = 3; 47 | optional string vcf_type = 4; 48 | repeated FieldLengthDescriptorComponentPB length = 5; 49 | repeated string vcf_delimiter = 6; 50 | optional string VCF_field_combine_operation = 7; 51 | } 52 | 53 | message Chromosome { 54 | required string name = 1; 55 | required int64 length = 2; 56 | required int64 tiledb_column_offset = 3; 57 | } 58 | 59 | message VidMappingPB { 60 | repeated GenomicsDBFieldInfo fields = 1; 61 | repeated Chromosome contigs = 2; 62 | } 63 | 64 | -------------------------------------------------------------------------------- /src/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(cpp) 2 | -------------------------------------------------------------------------------- /src/test/cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(src) 2 | -------------------------------------------------------------------------------- /src/test/cpp/include/test_mapping_data_loader.h: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #ifndef TEST_MAPDATA_LOADER 24 | #define TEST_MAPDATA_LOADER 25 | 26 | #include 27 | #include 28 | #include 29 | #include "headers.h" 30 | #include "vid_mapper_sql.h" 31 | #include 32 | #include "gtest/gtest.h" 33 | 34 | const std::string config_file_name = "/tmp/sql_mapper/sql_mapper_config.txt"; 35 | 36 | 37 | class MappingDataLoaderTester { 38 | private: 39 | std::vector m_contig_idx_to_info; 40 | std::vector> m_contig_begin_2_idx; 41 | std::vector> m_contig_end_2_idx; 42 | std::vector m_row_idx_to_info; 43 | std::unordered_map m_callset_name_to_row_idx; 44 | std::unordered_map m_field_name_to_idx; 45 | std::vector m_field_idx_to_info; 46 | public: 47 | MappingDataLoaderTester(const SQLVidMapperRequest&); 48 | void validate_contig_info(); 49 | void validate_callset_info(); 50 | void validate_field_info(); 51 | ~MappingDataLoaderTester(); 52 | }; 53 | 54 | /** 55 | * Following config parameters are expected in the file: 56 | * /tmp/sql_mapper/sql_mapper_config.txt 57 | * If the file does not exist, then default values are used 58 | * "host_name" - "localhost" 59 | * "user_name" - "postgres" 60 | * "pass_word" - "postgres" 61 | * "db_name" - "gendb" 62 | * "work_space" - "/tmp/sql_mapper/workspace" 63 | * "array_name" - "sql_mapper_test" 64 | */ 65 | class SQLMapperTest : public ::testing::Test { 66 | public: 67 | static MappingDataLoaderTester* loaderTester; 68 | 69 | protected: 70 | static void SetUpTestCase() { 71 | SQLVidMapperRequest request; 72 | std::ifstream config_file(config_file_name); 73 | 74 | if (config_file.is_open()) { 75 | getline(config_file, request.host_name); 76 | getline(config_file, request.user_name); 77 | getline(config_file, request.pass_word); 78 | getline(config_file, request.db_name); 79 | getline(config_file, request.work_space); 80 | getline(config_file, request.array_name); 81 | config_file.close(); 82 | } else { 83 | request.host_name = "localhost"; 84 | request.user_name = "postgres"; 85 | request.pass_word = "postgres"; 86 | request.db_name = "gendb"; 87 | request.work_space = "/tmp/sql_mapper/workspace"; 88 | request.array_name = "sql_mapper_test"; 89 | } 90 | 91 | loaderTester = new MappingDataLoaderTester(request); 92 | } 93 | 94 | static void TearDownTestCase() { 95 | delete loaderTester; 96 | loaderTester = NULL; 97 | } 98 | 99 | virtual void SetUp() { 100 | } 101 | 102 | virtual void TearDown() { 103 | } 104 | }; 105 | 106 | #endif 107 | -------------------------------------------------------------------------------- /src/test/cpp/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(GTest) 2 | if(GTEST_FOUND) 3 | set(CPP_TEST_SOURCES 4 | main_testall.cc 5 | test_non_diploid_mapper.cc 6 | test_multid_vector.cc 7 | ) 8 | if(LIBDBI_FOUND) 9 | set(CPP_TEST_SOURCES 10 | ${CPP_TEST_SOURCES} 11 | test_mapping_data_loader.cc) 12 | endif() 13 | add_executable(runAllGTests ${CPP_TEST_SOURCES}) 14 | target_link_libraries(runAllGTests ${GTEST_BOTH_LIBRARIES}) 15 | build_GenomicsDB_executable_common(runAllGTests) 16 | add_test(NAME All_GTests COMMAND runAllGTests) 17 | endif() 18 | -------------------------------------------------------------------------------- /src/test/cpp/src/main_testall.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "gtest/gtest.h" 3 | 4 | int main(int argc, char **argv) { 5 | ::testing::InitGoogleTest(&argc, argv); 6 | return(RUN_ALL_TESTS()); 7 | } 8 | -------------------------------------------------------------------------------- /src/test/java/com/intel/genomicsdb/model/GenomicsDBCallsetsMapProtoSpec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | package com.intel.genomicsdb.model; 24 | 25 | import com.intel.genomicsdb.model.GenomicsDBCallsetsMapProto; 26 | import org.testng.Assert; 27 | import org.testng.annotations.Test; 28 | 29 | import java.util.List; 30 | 31 | public final class GenomicsDBCallsetsMapProtoSpec { 32 | 33 | @Test(testName = "Validate callset map protocol buffer") 34 | public void testCallsetMapProto() { 35 | GenomicsDBCallsetsMapProto.CallsetMappingPB.Builder cBuilder = 36 | GenomicsDBCallsetsMapProto.CallsetMappingPB.newBuilder(); 37 | 38 | GenomicsDBCallsetsMapProto.SampleIDToTileDBIDMap.Builder sampleBuilder0 = 39 | GenomicsDBCallsetsMapProto.SampleIDToTileDBIDMap.newBuilder(); 40 | 41 | GenomicsDBCallsetsMapProto.SampleIDToTileDBIDMap sample0 = 42 | sampleBuilder0 43 | .setSampleName("ABC") 44 | .setIdxInFile(0) 45 | .setRowIdx(0) 46 | .build(); 47 | 48 | GenomicsDBCallsetsMapProto.SampleIDToTileDBIDMap.Builder sampleBuilder1 = 49 | GenomicsDBCallsetsMapProto.SampleIDToTileDBIDMap.newBuilder(); 50 | 51 | GenomicsDBCallsetsMapProto.SampleIDToTileDBIDMap sample1 = 52 | sampleBuilder1 53 | .setSampleName("MNP") 54 | .setIdxInFile(0) 55 | .setRowIdx(1) 56 | .build(); 57 | 58 | GenomicsDBCallsetsMapProto.SampleIDToTileDBIDMap.Builder sampleBuilder2 = 59 | GenomicsDBCallsetsMapProto.SampleIDToTileDBIDMap.newBuilder(); 60 | 61 | GenomicsDBCallsetsMapProto.SampleIDToTileDBIDMap sample2 = 62 | sampleBuilder2 63 | .setSampleName("XYZ") 64 | .setIdxInFile(0) 65 | .setRowIdx(2) 66 | .build(); 67 | 68 | GenomicsDBCallsetsMapProto.CallsetMappingPB callsetMappingPB = 69 | cBuilder 70 | .addCallsets(sample0) 71 | .addCallsets(sample1) 72 | .addCallsets(sample2) 73 | .build(); 74 | 75 | 76 | Assert.assertEquals(callsetMappingPB.isInitialized(), true); 77 | List sampleList = 78 | callsetMappingPB.getCallsetsList(); 79 | 80 | Assert.assertEquals(sample0, sampleList.get(0)); 81 | Assert.assertEquals(sample1, sampleList.get(1)); 82 | Assert.assertEquals(sample2, sampleList.get(2)); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/test/java/com/intel/genomicsdb/model/ImportConfigSpec.java: -------------------------------------------------------------------------------- 1 | package com.intel.genomicsdb.model; 2 | 3 | import htsjdk.tribble.FeatureReader; 4 | import htsjdk.variant.variantcontext.VariantContext; 5 | import htsjdk.variant.vcf.VCFHeaderLine; 6 | import org.testng.Assert; 7 | import org.testng.annotations.Test; 8 | 9 | import java.nio.file.Path; 10 | import java.util.*; 11 | import java.net.URI; 12 | 13 | public class ImportConfigSpec { 14 | 15 | @Test(testName = "should throw an exception when there is an intersection between chromosome intervals", 16 | expectedExceptions = IllegalArgumentException.class, 17 | expectedExceptionsMessageRegExp = "There are multiple intervals sharing same value. This is not allowed. " + 18 | "Intervals should be defined without intersections.") 19 | public void shouldThrowExceptionWhenThereIsAnIntersectionBetweenChromosomeIntervals() { 20 | //Given 21 | //When 22 | createBaseImportConfig(true); 23 | 24 | //Then 25 | //Exception is expected 26 | } 27 | 28 | @Test(testName = "should not throw an exception when there are not intersections between chromosome intervals") 29 | public void shouldNotThrowExceptionWhenThereAreIntersectionsButDifferentChromosomes() { 30 | //Given 31 | //When 32 | ImportConfig config = createBaseImportConfig(false); 33 | 34 | //Then 35 | Assert.assertEquals(config.getImportConfiguration().getColumnPartitionsList().size(), 2); 36 | } 37 | 38 | private ImportConfig createBaseImportConfig(final boolean withIntersection) { 39 | String defaultName = "a"; 40 | String secondPartition = withIntersection ? defaultName : "b"; 41 | GenomicsDBImportConfiguration.ImportConfiguration configuration = GenomicsDBImportConfiguration.ImportConfiguration.newBuilder() 42 | .addColumnPartitions(GenomicsDBImportConfiguration.Partition.newBuilder().setBegin( 43 | Coordinates.GenomicsDBColumn.newBuilder().setContigPosition( 44 | Coordinates.ContigPosition.newBuilder().setContig("a").setPosition(1).build() 45 | ).build() 46 | ).setEnd( 47 | Coordinates.GenomicsDBColumn.newBuilder().setContigPosition( 48 | Coordinates.ContigPosition.newBuilder().setContig("a").setPosition(2).build() 49 | ).build() 50 | ).build() 51 | ) 52 | .addColumnPartitions(GenomicsDBImportConfiguration.Partition.newBuilder().setBegin( 53 | Coordinates.GenomicsDBColumn.newBuilder().setContigPosition( 54 | Coordinates.ContigPosition.newBuilder().setContig(secondPartition).setPosition(1).build() 55 | ).build() 56 | ).setEnd( 57 | Coordinates.GenomicsDBColumn.newBuilder().setContigPosition( 58 | Coordinates.ContigPosition.newBuilder().setContig(secondPartition).setPosition(2).build() 59 | ).build() 60 | ).build() 61 | ) 62 | .setSizePerColumnPartition(16000).build(); 63 | boolean validateSampleToReaderMap = true; 64 | boolean passAsVcf = true; 65 | int batchSize = 1000; 66 | Set mergedHeader = new HashSet(); 67 | Map sampleNameToVcfPath = new TreeMap<>(); 68 | ImportConfig.Func, Integer, Integer, 69 | Map>> sampleToReaderMap = (a, b, c) -> new TreeMap<>(); 70 | 71 | return new ImportConfig(configuration, validateSampleToReaderMap, passAsVcf, batchSize, mergedHeader, 72 | sampleNameToVcfPath, sampleToReaderMap); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/test/java/com/intel/genomicsdb/reader/ChrArrayFolderComparatorSpec.java: -------------------------------------------------------------------------------- 1 | package com.intel.genomicsdb.reader; 2 | 3 | 4 | import org.testng.Assert; 5 | import org.testng.annotations.Test; 6 | 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | 10 | import static com.intel.genomicsdb.Constants.CHROMOSOME_INTERVAL_FOLDER; 11 | 12 | public class ChrArrayFolderComparatorSpec { 13 | @Test(testName = "should compare first by chromosome and then interval start") 14 | public void shouldCompareFirstByChromosomeAndThenIntervalStart() { 15 | //Given 16 | List arrayList = new ArrayList<>(); 17 | arrayList.add(String.format(CHROMOSOME_INTERVAL_FOLDER, "1", 100, 200)); 18 | arrayList.add(String.format(CHROMOSOME_INTERVAL_FOLDER, "1", 201, 300)); 19 | arrayList.add(String.format(CHROMOSOME_INTERVAL_FOLDER, "2", 100, 200)); 20 | arrayList.add(String.format(CHROMOSOME_INTERVAL_FOLDER, "2", 201, 300)); 21 | arrayList.add(String.format(CHROMOSOME_INTERVAL_FOLDER, "1", 1000, 2000)); 22 | 23 | //When 24 | arrayList.sort(new ChrArrayFolderComparator()); 25 | 26 | //Then 27 | Assert.assertEquals(arrayList.get(2), String.format(CHROMOSOME_INTERVAL_FOLDER, "1", 1000, 2000)); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /tests/golden_outputs/t0_1_2_calls_at_0_with_PL_only: -------------------------------------------------------------------------------- 1 | { 2 | "variant_calls": [ 3 | { 4 | "query_interval": [ 0, 1000000000 ], 5 | "variant_calls": [ 6 | { 7 | "row": 0, 8 | "interval": [ 12140, 12294 ], 9 | "genomic_interval": { "1" : [ 12141, 12295 ] }, 10 | "fields": { 11 | "REF": "C", 12 | "ALT": [ "" ], 13 | "PL": [ 0, 0, 0 ], 14 | "GT": [ 0, 0 ] 15 | } 16 | }, 17 | { 18 | "row": 1, 19 | "interval": [ 12144, 12276 ], 20 | "genomic_interval": { "1" : [ 12145, 12277 ] }, 21 | "fields": { 22 | "REF": "C", 23 | "ALT": [ "" ], 24 | "PL": [ 0, 0, 0 ], 25 | "GT": [ 0, 0 ] 26 | } 27 | }, 28 | { 29 | "row": 0, 30 | "interval": [ 17384, 17384 ], 31 | "genomic_interval": { "1" : [ 17385, 17385 ] }, 32 | "fields": { 33 | "REF": "G", 34 | "ALT": [ "A", "" ], 35 | "PL": [ 504, 0, 9807, 678, 1870, 2548 ], 36 | "GT": [ 0, 1 ] 37 | } 38 | }, 39 | { 40 | "row": 1, 41 | "interval": [ 17384, 17384 ], 42 | "genomic_interval": { "1" : [ 17385, 17385 ] }, 43 | "fields": { 44 | "REF": "G", 45 | "ALT": [ "T", "" ], 46 | "PL": [ 3336, 358, 0, 4536, 958, 7349 ], 47 | "GT": [ 1, 1 ] 48 | } 49 | }, 50 | { 51 | "row": 2, 52 | "interval": [ 17384, 17384 ], 53 | "genomic_interval": { "1" : [ 17385, 17385 ] }, 54 | "fields": { 55 | "REF": "G", 56 | "ALT": [ "A", "" ], 57 | "PL": [ 1018, 0, 1116, 1137, 1224, 2361 ], 58 | "GT": [ 0, 1 ] 59 | } 60 | } 61 | ] 62 | } 63 | ] 64 | } 65 | -------------------------------------------------------------------------------- /tests/golden_outputs/t0_1_2_calls_at_12100: -------------------------------------------------------------------------------- 1 | { 2 | "variant_calls": [ 3 | 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /tests/golden_outputs/t0_1_2_calls_at_12100_12141: -------------------------------------------------------------------------------- 1 | { 2 | "variant_calls": [ 3 | { 4 | "query_interval": [ 12141, 12141 ], 5 | "variant_calls": [ 6 | { 7 | "row": 0, 8 | "interval": [ 12140, 12294 ], 9 | "genomic_interval": { "1" : [ 12141, 12295 ] }, 10 | "fields": { 11 | "REF": "C", 12 | "ALT": [ "" ], 13 | "GT": [ 0, 0 ], 14 | "GQ": 0, 15 | "PL": [ 0, 0, 0 ], 16 | "DP_FORMAT": 2, 17 | "MIN_DP": 0 18 | } 19 | } 20 | ] 21 | } 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /tests/golden_outputs/t0_1_2_calls_at_12100_12141_12150: -------------------------------------------------------------------------------- 1 | { 2 | "variant_calls": [ 3 | { 4 | "query_interval": [ 12141, 12141 ], 5 | "variant_calls": [ 6 | { 7 | "row": 0, 8 | "interval": [ 12140, 12294 ], 9 | "genomic_interval": { "1" : [ 12141, 12295 ] }, 10 | "fields": { 11 | "REF": "C", 12 | "ALT": [ "" ], 13 | "GT": [ 0, 0 ], 14 | "GQ": 0, 15 | "PL": [ 0, 0, 0 ], 16 | "DP_FORMAT": 2, 17 | "MIN_DP": 0 18 | } 19 | } 20 | ] 21 | }, 22 | { 23 | "query_interval": [ 12150, 12150 ], 24 | "variant_calls": [ 25 | { 26 | "row": 0, 27 | "interval": [ 12140, 12294 ], 28 | "genomic_interval": { "1" : [ 12141, 12295 ] }, 29 | "fields": { 30 | "REF": "C", 31 | "ALT": [ "" ], 32 | "GT": [ 0, 0 ], 33 | "GQ": 0, 34 | "PL": [ 0, 0, 0 ], 35 | "DP_FORMAT": 2, 36 | "MIN_DP": 0 37 | } 38 | }, 39 | { 40 | "row": 1, 41 | "interval": [ 12144, 12276 ], 42 | "genomic_interval": { "1" : [ 12145, 12277 ] }, 43 | "fields": { 44 | "REF": "C", 45 | "ALT": [ "" ], 46 | "GT": [ 0, 0 ], 47 | "GQ": 0, 48 | "PL": [ 0, 0, 0 ], 49 | "DP_FORMAT": 3, 50 | "MIN_DP": 0 51 | } 52 | } 53 | ] 54 | } 55 | ] 56 | } 57 | -------------------------------------------------------------------------------- /tests/golden_outputs/t0_1_2_calls_at_12100_12141_to_12150: -------------------------------------------------------------------------------- 1 | { 2 | "variant_calls": [ 3 | { 4 | "query_interval": [ 12141, 12150 ], 5 | "variant_calls": [ 6 | { 7 | "row": 0, 8 | "interval": [ 12140, 12294 ], 9 | "genomic_interval": { "1" : [ 12141, 12295 ] }, 10 | "fields": { 11 | "REF": "C", 12 | "ALT": [ "" ], 13 | "GT": [ 0, 0 ], 14 | "GQ": 0, 15 | "PL": [ 0, 0, 0 ], 16 | "DP_FORMAT": 2, 17 | "MIN_DP": 0 18 | } 19 | }, 20 | { 21 | "row": 1, 22 | "interval": [ 12144, 12276 ], 23 | "genomic_interval": { "1" : [ 12145, 12277 ] }, 24 | "fields": { 25 | "REF": "C", 26 | "ALT": [ "" ], 27 | "GT": [ 0, 0 ], 28 | "GQ": 0, 29 | "PL": [ 0, 0, 0 ], 30 | "DP_FORMAT": 3, 31 | "MIN_DP": 0 32 | } 33 | } 34 | ] 35 | } 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /tests/golden_outputs/t0_with_missing_PL_SB_fields_t1_calls.json: -------------------------------------------------------------------------------- 1 | { 2 | "variant_calls": [ 3 | { 4 | "query_interval": [ 0, 1000000000 ], 5 | "variant_calls": [ 6 | { 7 | "row": 0, 8 | "interval": [ 12140, 12294 ], 9 | "genomic_interval": { "1" : [ 12141, 12295 ] }, 10 | "fields": { 11 | "REF": "C", 12 | "ALT": [ "" ], 13 | "GT": [ 0, 0 ], 14 | "GQ": 0, 15 | "PL": [ 0, 0, 0 ], 16 | "DP_FORMAT": 2, 17 | "MIN_DP": 0 18 | } 19 | }, 20 | { 21 | "row": 1, 22 | "interval": [ 12144, 12276 ], 23 | "genomic_interval": { "1" : [ 12145, 12277 ] }, 24 | "fields": { 25 | "REF": "C", 26 | "ALT": [ "" ], 27 | "GT": [ 0, 0 ], 28 | "GQ": 0, 29 | "PL": [ 0, 0, 0 ], 30 | "DP_FORMAT": 3, 31 | "MIN_DP": 0 32 | } 33 | }, 34 | { 35 | "row": 0, 36 | "interval": [ 17384, 17384 ], 37 | "genomic_interval": { "1" : [ 17385, 17385 ] }, 38 | "fields": { 39 | "REF": "G", 40 | "ALT": [ "A", "" ], 41 | "MQ": 31.72, 42 | "RAW_MQ": 5.5, 43 | "MQ0": 8, 44 | "ClippingRankSum": -1.859, 45 | "MQRankSum": -0.329, 46 | "ReadPosRankSum": 0.005, 47 | "GT": [ 0, 1 ], 48 | "GQ": 99, 49 | "AD": [ 58, 22, 17 ], 50 | "DP_FORMAT": 80, 51 | "PID": "17385_G_A", 52 | "PGT": "0|1", 53 | "BaseQRankSum": -2.096 54 | } 55 | }, 56 | { 57 | "row": 1, 58 | "interval": [ 17384, 17384 ], 59 | "genomic_interval": { "1" : [ 17385, 17385 ] }, 60 | "fields": { 61 | "REF": "G", 62 | "ALT": [ "T", "" ], 63 | "MQ": 29.82, 64 | "RAW_MQ": 2.5, 65 | "MQ0": 3, 66 | "ClippingRankSum": 0.555, 67 | "MQRankSum": -1.369, 68 | "ReadPosRankSum": -0.101, 69 | "DP": 120, 70 | "GT": [ 1, 1 ], 71 | "GQ": 99, 72 | "SB": [ 0, 0, 0, 0 ], 73 | "AD": [ 0, 120, 37 ], 74 | "PL": [ 3336, 358, 0, 4536, 958, 7349 ], 75 | "DP_FORMAT": 120, 76 | "PID": "17385_G_T", 77 | "PGT": "0|1", 78 | "BaseQRankSum": -2.074 79 | } 80 | } 81 | ] 82 | } 83 | ] 84 | } 85 | -------------------------------------------------------------------------------- /tests/golden_outputs/t6_7_8_calls_at_0: -------------------------------------------------------------------------------- 1 | { 2 | "variant_calls": [ 3 | { 4 | "query_interval": [ 0, 1000000000 ], 5 | "variant_calls": [ 6 | { 7 | "row": 0, 8 | "interval": [ 8029499, 8029501 ], 9 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 10 | "fields": { 11 | "REF": "TGG", 12 | "ALT": [ "T", "" ], 13 | "MQ": 55.72, 14 | "MQ0": 0, 15 | "ClippingRankSum": -1.067, 16 | "MQRankSum": 0.055555, 17 | "DP": 9, 18 | "GT": [ 0, 1 ], 19 | "GQ": 40, 20 | "SB": [ 0, 0, 0, 0 ], 21 | "AD": [ 6, 2, 0 ], 22 | "PL": [ 40, 0, 197, 58, 203, 262 ], 23 | "DP_FORMAT": 8, 24 | "BaseQRankSum": 1.067 25 | } 26 | }, 27 | { 28 | "row": 1, 29 | "interval": [ 8029499, 8029501 ], 30 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 31 | "fields": { 32 | "REF": "TGG", 33 | "ALT": [ "T", "TG", "" ], 34 | "MQ": 59.59, 35 | "MQ0": 0, 36 | "ClippingRankSum": -2.336, 37 | "MQRankSum": 1.052, 38 | "ReadPosRankSum": 1.611, 39 | "DP": 103, 40 | "GT": [ 0, 2 ], 41 | "GQ": 99, 42 | "SB": [ 5, 40, 3, 29 ], 43 | "AD": [ 45, 1, 32, 0 ], 44 | "PL": [ 733, 872, 3465, 0, 1789, 1573, 862, 2677, 1670, 2532 ], 45 | "DP_FORMAT": 78, 46 | "BaseQRankSum": 1.82 47 | } 48 | }, 49 | { 50 | "row": 2, 51 | "interval": [ 8029499, 8029500 ], 52 | "genomic_interval": { "1" : [ 8029500, 8029501 ] }, 53 | "fields": { 54 | "REF": "TG", 55 | "ALT": [ "T", "" ], 56 | "MQ": 60.25, 57 | "MQ0": 0, 58 | "ClippingRankSum": -1.647, 59 | "MQRankSum": 1.018, 60 | "ReadPosRankSum": 0.012, 61 | "DP": 85, 62 | "GT": [ 0, 1 ], 63 | "GQ": 89, 64 | "SB": [ 5, 42, 1, 9 ], 65 | "AD": [ 47, 10, 0 ], 66 | "PL": [ 89, 0, 1086, 229, 1119, 1348 ], 67 | "DP_FORMAT": 57, 68 | "BaseQRankSum": -0.703 69 | } 70 | } 71 | ] 72 | } 73 | ] 74 | } 75 | -------------------------------------------------------------------------------- /tests/golden_outputs/t6_7_8_calls_at_0_phased_GT: -------------------------------------------------------------------------------- 1 | { 2 | "variant_calls": [ 3 | { 4 | "query_interval": [ 0, 1000000000 ], 5 | "variant_calls": [ 6 | { 7 | "row": 0, 8 | "interval": [ 8029499, 8029501 ], 9 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 10 | "fields": { 11 | "REF": "TGG", 12 | "ALT": [ "T", "" ], 13 | "MQ": 55.72, 14 | "MQ0": 0, 15 | "ClippingRankSum": -1.067, 16 | "MQRankSum": 0.055555, 17 | "DP": 9, 18 | "GT": [ 0, 0, 1 ], 19 | "GQ": 40, 20 | "SB": [ 0, 0, 0, 0 ], 21 | "AD": [ 6, 2, 0 ], 22 | "PL": [ 40, 0, 197, 58, 203, 262 ], 23 | "DP_FORMAT": 8, 24 | "BaseQRankSum": 1.067 25 | } 26 | }, 27 | { 28 | "row": 1, 29 | "interval": [ 8029499, 8029501 ], 30 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 31 | "fields": { 32 | "REF": "TGG", 33 | "ALT": [ "T", "TG", "" ], 34 | "MQ": 59.59, 35 | "MQ0": 0, 36 | "ClippingRankSum": -2.336, 37 | "MQRankSum": 1.052, 38 | "ReadPosRankSum": 1.611, 39 | "DP": 103, 40 | "GT": [ 0, 0, 2 ], 41 | "GQ": 99, 42 | "SB": [ 5, 40, 3, 29 ], 43 | "AD": [ 45, 1, 32, 0 ], 44 | "PL": [ 733, 872, 3465, 0, 1789, 1573, 862, 2677, 1670, 2532 ], 45 | "DP_FORMAT": 78, 46 | "BaseQRankSum": 1.82 47 | } 48 | }, 49 | { 50 | "row": 2, 51 | "interval": [ 8029499, 8029500 ], 52 | "genomic_interval": { "1" : [ 8029500, 8029501 ] }, 53 | "fields": { 54 | "REF": "TG", 55 | "ALT": [ "T", "" ], 56 | "MQ": 60.25, 57 | "MQ0": 0, 58 | "ClippingRankSum": -1.647, 59 | "MQRankSum": 1.018, 60 | "ReadPosRankSum": 0.012, 61 | "DP": 85, 62 | "GT": [ 0, 0, 1 ], 63 | "GQ": 89, 64 | "SB": [ 5, 42, 1, 9 ], 65 | "AD": [ 47, 10, 0 ], 66 | "PL": [ 89, 0, 1086, 229, 1119, 1348 ], 67 | "DP_FORMAT": 57, 68 | "BaseQRankSum": -0.703 69 | } 70 | } 71 | ] 72 | } 73 | ] 74 | } 75 | -------------------------------------------------------------------------------- /tests/golden_outputs/t6_7_8_calls_at_8029500: -------------------------------------------------------------------------------- 1 | { 2 | "variant_calls": [ 3 | { 4 | "query_interval": [ 8029500, 1000000000 ], 5 | "variant_calls": [ 6 | { 7 | "row": 0, 8 | "interval": [ 8029499, 8029501 ], 9 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 10 | "fields": { 11 | "REF": "TGG", 12 | "ALT": [ "T", "" ], 13 | "MQ": 55.72, 14 | "MQ0": 0, 15 | "ClippingRankSum": -1.067, 16 | "MQRankSum": 0.055555, 17 | "DP": 9, 18 | "GT": [ 0, 1 ], 19 | "GQ": 40, 20 | "SB": [ 0, 0, 0, 0 ], 21 | "AD": [ 6, 2, 0 ], 22 | "PL": [ 40, 0, 197, 58, 203, 262 ], 23 | "DP_FORMAT": 8, 24 | "BaseQRankSum": 1.067 25 | } 26 | }, 27 | { 28 | "row": 1, 29 | "interval": [ 8029499, 8029501 ], 30 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 31 | "fields": { 32 | "REF": "TGG", 33 | "ALT": [ "T", "TG", "" ], 34 | "MQ": 59.59, 35 | "MQ0": 0, 36 | "ClippingRankSum": -2.336, 37 | "MQRankSum": 1.052, 38 | "ReadPosRankSum": 1.611, 39 | "DP": 103, 40 | "GT": [ 0, 2 ], 41 | "GQ": 99, 42 | "SB": [ 5, 40, 3, 29 ], 43 | "AD": [ 45, 1, 32, 0 ], 44 | "PL": [ 733, 872, 3465, 0, 1789, 1573, 862, 2677, 1670, 2532 ], 45 | "DP_FORMAT": 78, 46 | "BaseQRankSum": 1.82 47 | } 48 | }, 49 | { 50 | "row": 2, 51 | "interval": [ 8029499, 8029500 ], 52 | "genomic_interval": { "1" : [ 8029500, 8029501 ] }, 53 | "fields": { 54 | "REF": "TG", 55 | "ALT": [ "T", "" ], 56 | "MQ": 60.25, 57 | "MQ0": 0, 58 | "ClippingRankSum": -1.647, 59 | "MQRankSum": 1.018, 60 | "ReadPosRankSum": 0.012, 61 | "DP": 85, 62 | "GT": [ 0, 1 ], 63 | "GQ": 89, 64 | "SB": [ 5, 42, 1, 9 ], 65 | "AD": [ 47, 10, 0 ], 66 | "PL": [ 89, 0, 1086, 229, 1119, 1348 ], 67 | "DP_FORMAT": 57, 68 | "BaseQRankSum": -0.703 69 | } 70 | } 71 | ] 72 | } 73 | ] 74 | } 75 | -------------------------------------------------------------------------------- /tests/golden_outputs/t6_7_8_calls_at_8029500_phased_GT: -------------------------------------------------------------------------------- 1 | { 2 | "variant_calls": [ 3 | { 4 | "query_interval": [ 8029500, 1000000000 ], 5 | "variant_calls": [ 6 | { 7 | "row": 0, 8 | "interval": [ 8029499, 8029501 ], 9 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 10 | "fields": { 11 | "REF": "TGG", 12 | "ALT": [ "T", "" ], 13 | "MQ": 55.72, 14 | "MQ0": 0, 15 | "ClippingRankSum": -1.067, 16 | "MQRankSum": 0.055555, 17 | "DP": 9, 18 | "GT": [ 0, 0, 1 ], 19 | "GQ": 40, 20 | "SB": [ 0, 0, 0, 0 ], 21 | "AD": [ 6, 2, 0 ], 22 | "PL": [ 40, 0, 197, 58, 203, 262 ], 23 | "DP_FORMAT": 8, 24 | "BaseQRankSum": 1.067 25 | } 26 | }, 27 | { 28 | "row": 1, 29 | "interval": [ 8029499, 8029501 ], 30 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 31 | "fields": { 32 | "REF": "TGG", 33 | "ALT": [ "T", "TG", "" ], 34 | "MQ": 59.59, 35 | "MQ0": 0, 36 | "ClippingRankSum": -2.336, 37 | "MQRankSum": 1.052, 38 | "ReadPosRankSum": 1.611, 39 | "DP": 103, 40 | "GT": [ 0, 0, 2 ], 41 | "GQ": 99, 42 | "SB": [ 5, 40, 3, 29 ], 43 | "AD": [ 45, 1, 32, 0 ], 44 | "PL": [ 733, 872, 3465, 0, 1789, 1573, 862, 2677, 1670, 2532 ], 45 | "DP_FORMAT": 78, 46 | "BaseQRankSum": 1.82 47 | } 48 | }, 49 | { 50 | "row": 2, 51 | "interval": [ 8029499, 8029500 ], 52 | "genomic_interval": { "1" : [ 8029500, 8029501 ] }, 53 | "fields": { 54 | "REF": "TG", 55 | "ALT": [ "T", "" ], 56 | "MQ": 60.25, 57 | "MQ0": 0, 58 | "ClippingRankSum": -1.647, 59 | "MQRankSum": 1.018, 60 | "ReadPosRankSum": 0.012, 61 | "DP": 85, 62 | "GT": [ 0, 0, 1 ], 63 | "GQ": 89, 64 | "SB": [ 5, 42, 1, 9 ], 65 | "AD": [ 47, 10, 0 ], 66 | "PL": [ 89, 0, 1086, 229, 1119, 1348 ], 67 | "DP_FORMAT": 57, 68 | "BaseQRankSum": -0.703 69 | } 70 | } 71 | ] 72 | } 73 | ] 74 | } 75 | -------------------------------------------------------------------------------- /tests/golden_outputs/t6_7_8_variants_at_0: -------------------------------------------------------------------------------- 1 | { 2 | "variants": [ 3 | { 4 | "interval": [ 8029499, 8029501 ], 5 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 6 | "common_fields" : { 7 | 8 | }, 9 | "variant_calls": [ 10 | { 11 | "row": 0, 12 | "interval": [ 8029499, 8029501 ], 13 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 14 | "fields": { 15 | "REF": "TGG", 16 | "ALT": [ "T","" ], 17 | "MQ": [ 55.720001 ], 18 | "MQ0": [ 0 ], 19 | "ClippingRankSum": [ -1.067000 ], 20 | "MQRankSum": [ 0.055555 ], 21 | "DP": [ 9 ], 22 | "GT": [ 0,1 ], 23 | "GQ": [ 40 ], 24 | "SB": [ 0,0,0,0 ], 25 | "AD": [ 6,2,0 ], 26 | "PL": [ 40,0,197,58,203,262 ], 27 | "DP_FORMAT": [ 8 ], 28 | "BaseQRankSum": [ 1.067000 ] 29 | } 30 | } 31 | ] 32 | }, 33 | { 34 | "interval": [ 8029499, 8029501 ], 35 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 36 | "common_fields" : { 37 | 38 | }, 39 | "variant_calls": [ 40 | { 41 | "row": 1, 42 | "interval": [ 8029499, 8029501 ], 43 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 44 | "fields": { 45 | "REF": "TGG", 46 | "ALT": [ "T","TG","" ], 47 | "MQ": [ 59.590000 ], 48 | "MQ0": [ 0 ], 49 | "ClippingRankSum": [ -2.336000 ], 50 | "MQRankSum": [ 1.052000 ], 51 | "ReadPosRankSum": [ 1.611000 ], 52 | "DP": [ 103 ], 53 | "GT": [ 0,2 ], 54 | "GQ": [ 99 ], 55 | "SB": [ 5,40,3,29 ], 56 | "AD": [ 45,1,32,0 ], 57 | "PL": [ 733,872,3465,0,1789,1573,862,2677,1670,2532 ], 58 | "DP_FORMAT": [ 78 ], 59 | "BaseQRankSum": [ 1.820000 ] 60 | } 61 | } 62 | ] 63 | }, 64 | { 65 | "interval": [ 8029499, 8029500 ], 66 | "genomic_interval": { "1" : [ 8029500, 8029501 ] }, 67 | "common_fields" : { 68 | 69 | }, 70 | "variant_calls": [ 71 | { 72 | "row": 2, 73 | "interval": [ 8029499, 8029500 ], 74 | "genomic_interval": { "1" : [ 8029500, 8029501 ] }, 75 | "fields": { 76 | "REF": "TG", 77 | "ALT": [ "T","" ], 78 | "MQ": [ 60.250000 ], 79 | "MQ0": [ 0 ], 80 | "ClippingRankSum": [ -1.647000 ], 81 | "MQRankSum": [ 1.018000 ], 82 | "ReadPosRankSum": [ 0.012000 ], 83 | "DP": [ 85 ], 84 | "GT": [ 0,1 ], 85 | "GQ": [ 89 ], 86 | "SB": [ 5,42,1,9 ], 87 | "AD": [ 47,10,0 ], 88 | "PL": [ 89,0,1086,229,1119,1348 ], 89 | "DP_FORMAT": [ 57 ], 90 | "BaseQRankSum": [ -0.703000 ] 91 | } 92 | } 93 | ] 94 | } 95 | ] 96 | } 97 | -------------------------------------------------------------------------------- /tests/golden_outputs/t6_7_8_variants_at_0_phased_GT: -------------------------------------------------------------------------------- 1 | { 2 | "variants": [ 3 | { 4 | "interval": [ 8029499, 8029501 ], 5 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 6 | "common_fields" : { 7 | 8 | }, 9 | "variant_calls": [ 10 | { 11 | "row": 0, 12 | "interval": [ 8029499, 8029501 ], 13 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 14 | "fields": { 15 | "REF": "TGG", 16 | "ALT": [ "T","" ], 17 | "MQ": [ 55.720001 ], 18 | "MQ0": [ 0 ], 19 | "ClippingRankSum": [ -1.067000 ], 20 | "MQRankSum": [ 0.055555 ], 21 | "DP": [ 9 ], 22 | "GT": [ 0,0,1 ], 23 | "GQ": [ 40 ], 24 | "SB": [ 0,0,0,0 ], 25 | "AD": [ 6,2,0 ], 26 | "PL": [ 40,0,197,58,203,262 ], 27 | "DP_FORMAT": [ 8 ], 28 | "BaseQRankSum": [ 1.067000 ] 29 | } 30 | } 31 | ] 32 | }, 33 | { 34 | "interval": [ 8029499, 8029501 ], 35 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 36 | "common_fields" : { 37 | 38 | }, 39 | "variant_calls": [ 40 | { 41 | "row": 1, 42 | "interval": [ 8029499, 8029501 ], 43 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 44 | "fields": { 45 | "REF": "TGG", 46 | "ALT": [ "T","TG","" ], 47 | "MQ": [ 59.590000 ], 48 | "MQ0": [ 0 ], 49 | "ClippingRankSum": [ -2.336000 ], 50 | "MQRankSum": [ 1.052000 ], 51 | "ReadPosRankSum": [ 1.611000 ], 52 | "DP": [ 103 ], 53 | "GT": [ 0,0,2 ], 54 | "GQ": [ 99 ], 55 | "SB": [ 5,40,3,29 ], 56 | "AD": [ 45,1,32,0 ], 57 | "PL": [ 733,872,3465,0,1789,1573,862,2677,1670,2532 ], 58 | "DP_FORMAT": [ 78 ], 59 | "BaseQRankSum": [ 1.820000 ] 60 | } 61 | } 62 | ] 63 | }, 64 | { 65 | "interval": [ 8029499, 8029500 ], 66 | "genomic_interval": { "1" : [ 8029500, 8029501 ] }, 67 | "common_fields" : { 68 | 69 | }, 70 | "variant_calls": [ 71 | { 72 | "row": 2, 73 | "interval": [ 8029499, 8029500 ], 74 | "genomic_interval": { "1" : [ 8029500, 8029501 ] }, 75 | "fields": { 76 | "REF": "TG", 77 | "ALT": [ "T","" ], 78 | "MQ": [ 60.250000 ], 79 | "MQ0": [ 0 ], 80 | "ClippingRankSum": [ -1.647000 ], 81 | "MQRankSum": [ 1.018000 ], 82 | "ReadPosRankSum": [ 0.012000 ], 83 | "DP": [ 85 ], 84 | "GT": [ 0,0,1 ], 85 | "GQ": [ 89 ], 86 | "SB": [ 5,42,1,9 ], 87 | "AD": [ 47,10,0 ], 88 | "PL": [ 89,0,1086,229,1119,1348 ], 89 | "DP_FORMAT": [ 57 ], 90 | "BaseQRankSum": [ -0.703000 ] 91 | } 92 | } 93 | ] 94 | } 95 | ] 96 | } 97 | -------------------------------------------------------------------------------- /tests/golden_outputs/t6_7_8_variants_at_8029500: -------------------------------------------------------------------------------- 1 | { 2 | "variants": [ 3 | { 4 | "interval": [ 8029499, 8029501 ], 5 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 6 | "common_fields" : { 7 | 8 | }, 9 | "variant_calls": [ 10 | { 11 | "row": 0, 12 | "interval": [ 8029499, 8029501 ], 13 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 14 | "fields": { 15 | "REF": "TGG", 16 | "ALT": [ "T","" ], 17 | "MQ": [ 55.720001 ], 18 | "MQ0": [ 0 ], 19 | "ClippingRankSum": [ -1.067000 ], 20 | "MQRankSum": [ 0.055555 ], 21 | "DP": [ 9 ], 22 | "GT": [ 0,1 ], 23 | "GQ": [ 40 ], 24 | "SB": [ 0,0,0,0 ], 25 | "AD": [ 6,2,0 ], 26 | "PL": [ 40,0,197,58,203,262 ], 27 | "DP_FORMAT": [ 8 ], 28 | "BaseQRankSum": [ 1.067000 ] 29 | } 30 | } 31 | ] 32 | }, 33 | { 34 | "interval": [ 8029499, 8029501 ], 35 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 36 | "common_fields" : { 37 | 38 | }, 39 | "variant_calls": [ 40 | { 41 | "row": 1, 42 | "interval": [ 8029499, 8029501 ], 43 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 44 | "fields": { 45 | "REF": "TGG", 46 | "ALT": [ "T","TG","" ], 47 | "MQ": [ 59.590000 ], 48 | "MQ0": [ 0 ], 49 | "ClippingRankSum": [ -2.336000 ], 50 | "MQRankSum": [ 1.052000 ], 51 | "ReadPosRankSum": [ 1.611000 ], 52 | "DP": [ 103 ], 53 | "GT": [ 0,2 ], 54 | "GQ": [ 99 ], 55 | "SB": [ 5,40,3,29 ], 56 | "AD": [ 45,1,32,0 ], 57 | "PL": [ 733,872,3465,0,1789,1573,862,2677,1670,2532 ], 58 | "DP_FORMAT": [ 78 ], 59 | "BaseQRankSum": [ 1.820000 ] 60 | } 61 | } 62 | ] 63 | }, 64 | { 65 | "interval": [ 8029499, 8029500 ], 66 | "genomic_interval": { "1" : [ 8029500, 8029501 ] }, 67 | "common_fields" : { 68 | 69 | }, 70 | "variant_calls": [ 71 | { 72 | "row": 2, 73 | "interval": [ 8029499, 8029500 ], 74 | "genomic_interval": { "1" : [ 8029500, 8029501 ] }, 75 | "fields": { 76 | "REF": "TG", 77 | "ALT": [ "T","" ], 78 | "MQ": [ 60.250000 ], 79 | "MQ0": [ 0 ], 80 | "ClippingRankSum": [ -1.647000 ], 81 | "MQRankSum": [ 1.018000 ], 82 | "ReadPosRankSum": [ 0.012000 ], 83 | "DP": [ 85 ], 84 | "GT": [ 0,1 ], 85 | "GQ": [ 89 ], 86 | "SB": [ 5,42,1,9 ], 87 | "AD": [ 47,10,0 ], 88 | "PL": [ 89,0,1086,229,1119,1348 ], 89 | "DP_FORMAT": [ 57 ], 90 | "BaseQRankSum": [ -0.703000 ] 91 | } 92 | } 93 | ] 94 | } 95 | ] 96 | } 97 | -------------------------------------------------------------------------------- /tests/golden_outputs/t6_7_8_variants_at_8029500_phased_GT: -------------------------------------------------------------------------------- 1 | { 2 | "variants": [ 3 | { 4 | "interval": [ 8029499, 8029501 ], 5 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 6 | "common_fields" : { 7 | 8 | }, 9 | "variant_calls": [ 10 | { 11 | "row": 0, 12 | "interval": [ 8029499, 8029501 ], 13 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 14 | "fields": { 15 | "REF": "TGG", 16 | "ALT": [ "T","" ], 17 | "MQ": [ 55.720001 ], 18 | "MQ0": [ 0 ], 19 | "ClippingRankSum": [ -1.067000 ], 20 | "MQRankSum": [ 0.055555 ], 21 | "DP": [ 9 ], 22 | "GT": [ 0,0,1 ], 23 | "GQ": [ 40 ], 24 | "SB": [ 0,0,0,0 ], 25 | "AD": [ 6,2,0 ], 26 | "PL": [ 40,0,197,58,203,262 ], 27 | "DP_FORMAT": [ 8 ], 28 | "BaseQRankSum": [ 1.067000 ] 29 | } 30 | } 31 | ] 32 | }, 33 | { 34 | "interval": [ 8029499, 8029501 ], 35 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 36 | "common_fields" : { 37 | 38 | }, 39 | "variant_calls": [ 40 | { 41 | "row": 1, 42 | "interval": [ 8029499, 8029501 ], 43 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 44 | "fields": { 45 | "REF": "TGG", 46 | "ALT": [ "T","TG","" ], 47 | "MQ": [ 59.590000 ], 48 | "MQ0": [ 0 ], 49 | "ClippingRankSum": [ -2.336000 ], 50 | "MQRankSum": [ 1.052000 ], 51 | "ReadPosRankSum": [ 1.611000 ], 52 | "DP": [ 103 ], 53 | "GT": [ 0,0,2 ], 54 | "GQ": [ 99 ], 55 | "SB": [ 5,40,3,29 ], 56 | "AD": [ 45,1,32,0 ], 57 | "PL": [ 733,872,3465,0,1789,1573,862,2677,1670,2532 ], 58 | "DP_FORMAT": [ 78 ], 59 | "BaseQRankSum": [ 1.820000 ] 60 | } 61 | } 62 | ] 63 | }, 64 | { 65 | "interval": [ 8029499, 8029500 ], 66 | "genomic_interval": { "1" : [ 8029500, 8029501 ] }, 67 | "common_fields" : { 68 | 69 | }, 70 | "variant_calls": [ 71 | { 72 | "row": 2, 73 | "interval": [ 8029499, 8029500 ], 74 | "genomic_interval": { "1" : [ 8029500, 8029501 ] }, 75 | "fields": { 76 | "REF": "TG", 77 | "ALT": [ "T","" ], 78 | "MQ": [ 60.250000 ], 79 | "MQ0": [ 0 ], 80 | "ClippingRankSum": [ -1.647000 ], 81 | "MQRankSum": [ 1.018000 ], 82 | "ReadPosRankSum": [ 0.012000 ], 83 | "DP": [ 85 ], 84 | "GT": [ 0,0,1 ], 85 | "GQ": [ 89 ], 86 | "SB": [ 5,42,1,9 ], 87 | "AD": [ 47,10,0 ], 88 | "PL": [ 89,0,1086,229,1119,1348 ], 89 | "DP_FORMAT": [ 57 ], 90 | "BaseQRankSum": [ -0.703000 ] 91 | } 92 | } 93 | ] 94 | } 95 | ] 96 | } 97 | -------------------------------------------------------------------------------- /tests/golden_outputs/test_new_fields_MLEAC_only.json: -------------------------------------------------------------------------------- 1 | { 2 | "variant_calls": [ 3 | { 4 | "query_interval": [ 0, 1000000000 ], 5 | "variant_calls": [ 6 | { 7 | "row": 0, 8 | "interval": [ 8029499, 8029501 ], 9 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 10 | "fields": { 11 | "REF": "TGG", 12 | "ALT": [ "T", "" ], 13 | "MLEAC": [ 1, 0 ] 14 | } 15 | }, 16 | { 17 | "row": 1, 18 | "interval": [ 8029499, 8029501 ], 19 | "genomic_interval": { "1" : [ 8029500, 8029502 ] }, 20 | "fields": { 21 | "REF": "TGG", 22 | "ALT": [ "T", "TG", "" ], 23 | "MLEAC": [ 0, 1, 0 ] 24 | } 25 | }, 26 | { 27 | "row": 2, 28 | "interval": [ 8029499, 8029500 ], 29 | "genomic_interval": { "1" : [ 8029500, 8029501 ] }, 30 | "fields": { 31 | "REF": "TG", 32 | "ALT": [ "T", "" ], 33 | "MLEAC": [ 1, 0 ] 34 | } 35 | } 36 | ] 37 | } 38 | ] 39 | } 40 | -------------------------------------------------------------------------------- /tests/hostfile: -------------------------------------------------------------------------------- 1 | localhost 2 | -------------------------------------------------------------------------------- /tests/inputs/callsets/info_ops.json: -------------------------------------------------------------------------------- 1 | { 2 | "callsets" : { 3 | "HG00141" : { 4 | "row_idx" : 0, 5 | "idx_in_file": 0, 6 | "filename": "inputs/vcfs/info_op0.vcf.gz" 7 | }, 8 | "HG01958" : { 9 | "row_idx" : 1, 10 | "idx_in_file": 0, 11 | "filename": "inputs/vcfs/info_op1.vcf.gz" 12 | }, 13 | "HG01530" : { 14 | "row_idx" : 2, 15 | "idx_in_file": 0, 16 | "filename": "inputs/vcfs/info_op2.vcf.gz" 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /tests/inputs/callsets/min_PL_spanning_deletion.json: -------------------------------------------------------------------------------- 1 | { 2 | "callsets" : { 3 | "HG001958" : { 4 | "row_idx" : 0, 5 | "idx_in_file": 0, 6 | "filename": "inputs/vcfs/min_PL_spanning_deletion.vcf.gz" 7 | } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /tests/inputs/callsets/t0_1_2.csv: -------------------------------------------------------------------------------- 1 | 0,12140,12294,C,&,,0,,,,,,,,,2,0,,,,,0,3,0,0,0,,,0,2,0,0 2 | 1,12144,12276,C,&,,0,,,,,,,,,3,0,,,,,0,3,0,0,0,,,0,2,0,0 3 | 0,17384,17384,G,A|&,475.77,1,0,-2.096,-1.859,-0.329,0.005,31.72,5.5,8,,80,99,58,0,22,0,3,58,22,17,6,504,0,9807,678,1870,2548,0|1,17385_G_A,,2,0,1 4 | 1,17384,17384,G,T|&,3302.77,1,0,-2.074,0.555,-1.369,-0.101,29.82,2.5,3,120,120,99,0,0,0,0,3,0,120,37,6,3336,358,0,4536,958,7349,0|1,17385_G_T,,2,1,1 5 | 2,17384,17384,G,A|&,989.77,0,1.046,-2.242,-0.432,2.055,59.37,,0,76,76,99,9,31,13,23,3,40,36,0,6,1018,0,1116,1137,1224,2361,,,,2,0,1 6 | -------------------------------------------------------------------------------- /tests/inputs/callsets/t0_1_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "callsets" : { 3 | "HG00141" : { 4 | "row_idx" : 0, 5 | "idx_in_file": 0, 6 | "filename": "inputs/vcfs/t0.vcf.gz" 7 | }, 8 | "HG01958" : { 9 | "row_idx" : 1, 10 | "idx_in_file": 0, 11 | "filename": "inputs/vcfs/t1.vcf.gz" 12 | }, 13 | "HG01530" : { 14 | "row_idx" : 2, 15 | "idx_in_file": 0, 16 | "filename": "inputs/vcfs/t2.vcf.gz" 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /tests/inputs/callsets/t0_1_2_all_asa.json: -------------------------------------------------------------------------------- 1 | { 2 | "callsets" : { 3 | "HG00141" : { 4 | "row_idx" : 0, 5 | "idx_in_file": 0, 6 | "filename": "inputs/vcfs/t0_asa.vcf.gz" 7 | }, 8 | "HG01958" : { 9 | "row_idx" : 1, 10 | "idx_in_file": 0, 11 | "filename": "inputs/vcfs/t1_asa.vcf.gz" 12 | }, 13 | "HG01530" : { 14 | "row_idx" : 2, 15 | "idx_in_file": 0, 16 | "filename": "inputs/vcfs/t2_asa.vcf.gz" 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /tests/inputs/callsets/t0_1_2_as_array.json: -------------------------------------------------------------------------------- 1 | { 2 | "callsets" : [ 3 | { 4 | "sample_name": "HG00141", 5 | "row_idx" : 0, 6 | "idx_in_file": 0, 7 | "filename": "inputs/vcfs/t0.vcf.gz" 8 | }, 9 | { 10 | "name": "HG01958", 11 | "row_idx" : 1, 12 | "idx_in_file": 0, 13 | "filename": "inputs/vcfs/t1.vcf.gz" 14 | }, 15 | { 16 | "callset_name": "HG01530", 17 | "row_idx" : 2, 18 | "idx_in_file": 0, 19 | "filename": "inputs/vcfs/t2.vcf.gz" 20 | } 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /tests/inputs/callsets/t0_1_2_buffer.json: -------------------------------------------------------------------------------- 1 | { 2 | "callsets" : { 3 | "HG00141" : { 4 | "row_idx" : 0, 5 | "idx_in_file": 0, 6 | "stream_name": "HG00141_stream" 7 | }, 8 | "HG01958" : { 9 | "row_idx" : 1, 10 | "idx_in_file": 0, 11 | "stream_name": "HG01958_stream" 12 | }, 13 | "HG01530" : { 14 | "row_idx" : 2, 15 | "idx_in_file": 0, 16 | "stream_name": "HG01530_stream" 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /tests/inputs/callsets/t0_1_2_buffer_mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "HG00141_stream": "inputs/vcfs/t0.vcf.gz", 3 | "HG01958_stream": "inputs/vcfs/t1.vcf.gz", 4 | "HG01530_stream": "inputs/vcfs/t2.vcf.gz" 5 | } 6 | -------------------------------------------------------------------------------- /tests/inputs/callsets/t0_1_2_combined.json: -------------------------------------------------------------------------------- 1 | { 2 | "callsets" : { 3 | "HG00141" : { 4 | "row_idx" : 0, 5 | "idx_in_file": 0, 6 | "filename": "inputs/vcfs/t0_1_2_combined.vcf.gz" 7 | }, 8 | "HG01958" : { 9 | "row_idx" : 1, 10 | "idx_in_file": 1, 11 | "filename": "inputs/vcfs/t0_1_2_combined.vcf.gz" 12 | }, 13 | "HG01530" : { 14 | "row_idx" : 2, 15 | "idx_in_file": 2, 16 | "filename": "inputs/vcfs/t0_1_2_combined.vcf.gz" 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /tests/inputs/callsets/t0_1_2_csv.json: -------------------------------------------------------------------------------- 1 | { 2 | "callsets" : { 3 | "HG00141" : { 4 | "row_idx" : 0, 5 | "idx_in_file": 0, 6 | "filename": "inputs/callsets/t0_1_2.csv" 7 | }, 8 | "HG01958" : { 9 | "row_idx" : 1, 10 | "idx_in_file": 1, 11 | "filename": "inputs/callsets/t0_1_2.csv" 12 | }, 13 | "HG01530" : { 14 | "row_idx" : 2, 15 | "idx_in_file": 2, 16 | "filename": "inputs/callsets/t0_1_2.csv" 17 | } 18 | }, 19 | "unsorted_csv_files" :[ "inputs/callsets/t0_1_2.csv" ] 20 | } 21 | -------------------------------------------------------------------------------- /tests/inputs/callsets/t0_haploid_triploid_1_2_3_triploid_deletion.json: -------------------------------------------------------------------------------- 1 | { 2 | "callsets" : { 3 | "HG00141" : { 4 | "row_idx" : 0, 5 | "filename": "inputs/vcfs/t0_haploid_triploid.vcf.gz" 6 | }, 7 | "HG01958" : { 8 | "row_idx" : 1, 9 | "idx_in_file": 0, 10 | "filename": "inputs/vcfs/t1.vcf.gz" 11 | }, 12 | "HG01530" : { 13 | "row_idx" : 2, 14 | "idx_in_file": 0, 15 | "filename": "inputs/vcfs/t2.vcf.gz" 16 | }, 17 | "NA12878" : { 18 | "row_idx" : 3, 19 | "idx_in_file": 0, 20 | "filename": "inputs/vcfs/t3_triploid_deletion.vcf.gz" 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /tests/inputs/callsets/t0_overlapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "callsets" : { 3 | "HG00141" : { 4 | "row_idx" : 0, 5 | "filename": "inputs/vcfs/t0_overlapping.vcf.gz" 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /tests/inputs/callsets/t0_with_missing_PL_SB_fields_t1.json: -------------------------------------------------------------------------------- 1 | { 2 | "callsets" : { 3 | "HG00141" : { 4 | "row_idx" : 0, 5 | "filename": "inputs/vcfs/t0_with_missing_PL_SB_fields.vcf.gz" 6 | }, 7 | "HG01958" : { 8 | "row_idx" : 1, 9 | "filename": "inputs/vcfs/t1.vcf.gz" 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /tests/inputs/callsets/t6_7_8.json: -------------------------------------------------------------------------------- 1 | { 2 | "callsets" : { 3 | "HG00141" : { 4 | "row_idx" : 0, 5 | "idx_in_file": 0, 6 | "filename": "inputs/vcfs/t6.vcf.gz" 7 | }, 8 | "HG01958" : { 9 | "row_idx" : 1, 10 | "idx_in_file": 0, 11 | "filename": "inputs/vcfs/t7.vcf.gz" 12 | }, 13 | "HG01530" : { 14 | "row_idx" : 2, 15 | "idx_in_file": 0, 16 | "filename": "inputs/vcfs/t8.vcf.gz" 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /tests/inputs/chr1_10MB.fasta.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/chr1_10MB.fasta.gz -------------------------------------------------------------------------------- /tests/inputs/chr1_10MB.fasta.gz.fai: -------------------------------------------------------------------------------- 1 | 1 9876492 52 80 81 2 | -------------------------------------------------------------------------------- /tests/inputs/chr1_10MB.fasta.gz.gzi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/chr1_10MB.fasta.gz.gzi -------------------------------------------------------------------------------- /tests/inputs/vcfs/info_op0.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/info_op0.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/info_op0.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/info_op0.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/info_op1.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/info_op1.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/info_op1.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/info_op1.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/info_op2.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/info_op2.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/info_op2.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/info_op2.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/min_PL_spanning_deletion.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/min_PL_spanning_deletion.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/min_PL_spanning_deletion.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/min_PL_spanning_deletion.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/t0.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t0.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/t0.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t0.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/t0_1_2_combined.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t0_1_2_combined.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/t0_1_2_combined.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t0_1_2_combined.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/t0_asa.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t0_asa.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/t0_asa.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t0_asa.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/t0_haploid_triploid.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t0_haploid_triploid.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/t0_haploid_triploid.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t0_haploid_triploid.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/t0_overlapping.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t0_overlapping.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/t0_overlapping.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t0_overlapping.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/t0_with_missing_PL_SB_fields.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t0_with_missing_PL_SB_fields.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/t0_with_missing_PL_SB_fields.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t0_with_missing_PL_SB_fields.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/t1.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t1.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/t1.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t1.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/t1_asa.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t1_asa.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/t1_asa.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t1_asa.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/t2.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t2.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/t2.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t2.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/t2_asa.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t2_asa.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/t2_asa.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t2_asa.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/t3_triploid_deletion.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t3_triploid_deletion.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/t3_triploid_deletion.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t3_triploid_deletion.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/t6.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t6.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/t6.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t6.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/t7.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t7.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/t7.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t7.vcf.gz.tbi -------------------------------------------------------------------------------- /tests/inputs/vcfs/t8.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t8.vcf.gz -------------------------------------------------------------------------------- /tests/inputs/vcfs/t8.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intel-HLS/GenomicsDB/7db8520b85f22e3b4d9302121f17037c1f93b576/tests/inputs/vcfs/t8.vcf.gz.tbi -------------------------------------------------------------------------------- /tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories("include") 2 | 3 | if(NOT DISABLE_MPI) 4 | build_GenomicsDB_executable(create_tiledb_workspace) 5 | build_GenomicsDB_executable(gt_mpi_gather) 6 | build_GenomicsDB_executable(vcf2tiledb) 7 | build_GenomicsDB_executable(vcfdiff) 8 | build_GenomicsDB_executable(vcf_histogram) 9 | build_GenomicsDB_executable(consolidate_tiledb_array) 10 | endif() 11 | -------------------------------------------------------------------------------- /tools/src/consolidate_tiledb_array.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include 24 | #include "tiledb_loader.h" 25 | 26 | int main(int argc, char** argv) 27 | { 28 | if(argc < 3) 29 | { 30 | std::cerr << "Needs 2 arguments \n"; 31 | exit(-1); 32 | } 33 | VCF2TileDBLoader::consolidate_tiledb_array(argv[1], argv[2]); 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /tools/src/create_tiledb_workspace.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include 24 | #include "tiledb_utils.h" 25 | 26 | int main(int argc, char** argv) 27 | { 28 | if(argc < 2) 29 | { 30 | std::cerr << "Needs 1 argument \n"; 31 | exit(-1); 32 | } 33 | return TileDBUtils::create_workspace(argv[1], false); 34 | } 35 | -------------------------------------------------------------------------------- /tools/src/vcf_histogram.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2016-2017 Intel Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | * the Software, and to permit persons to whom the Software is furnished to do so, 10 | * subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include "vcf2binary.h" 24 | #include "tiledb_loader.h" 25 | #include 26 | 27 | #ifdef USE_GPERFTOOLS 28 | #include "gperftools/profiler.h" 29 | #endif 30 | 31 | int main(int argc, char** argv) 32 | { 33 | if(argc <= 1) 34 | { 35 | std::cerr << "Needs 1 arg \n"; 36 | exit(-1); 37 | } 38 | //Converter object 39 | GenomicsDBImportConfig loader_config; 40 | loader_config.read_from_file(argv[1], 0); 41 | VCF2TileDBConverter converter(loader_config, 0); 42 | converter.create_and_print_histogram(argv[1]); 43 | return 0; 44 | } 45 | --------------------------------------------------------------------------------