├── .asf.yaml ├── .clang-format ├── .clang-tidy ├── .github ├── .licenserc.yaml ├── PULL_REQUEST_TEMPLATE ├── dependabot.yml ├── labeler.yml ├── lsan-suppressions.txt └── workflows │ ├── asan_test.yml │ ├── build_and_test.yml │ ├── labeler.yml │ ├── pages.yml │ ├── publish_snapshot.yml │ └── stale.yml ├── .gitignore ├── .idea └── vcs.xml ├── .markdownlint.yaml ├── .markdownlintignore ├── .nojekyll ├── CMakeLists.txt ├── LICENSE ├── NOTICE ├── README.md ├── c++ ├── CMakeLists.txt ├── Doxyfile ├── build-support │ ├── README.md │ ├── run_clang_format.py │ └── run_clang_tidy.py ├── include │ ├── CMakeLists.txt │ └── orc │ │ ├── BloomFilter.hh │ │ ├── ColumnPrinter.hh │ │ ├── Common.hh │ │ ├── Exceptions.hh │ │ ├── Int128.hh │ │ ├── MemoryPool.hh │ │ ├── OrcFile.hh │ │ ├── Reader.hh │ │ ├── Statistics.hh │ │ ├── Type.hh │ │ ├── Vector.hh │ │ ├── Writer.hh │ │ ├── orc-config.hh.in │ │ └── sargs │ │ ├── Literal.hh │ │ ├── SearchArgument.hh │ │ └── TruthValue.hh ├── libs │ └── libhdfspp │ │ ├── imported_timestamp │ │ ├── libhdfspp.tar.gz │ │ └── pull_hdfs.sh ├── orcConfig.cmake.in ├── src │ ├── Adaptor.cc │ ├── Adaptor.hh.in │ ├── BitUnpackerAvx512.hh │ ├── BlockBuffer.cc │ ├── BlockBuffer.hh │ ├── BloomFilter.cc │ ├── BloomFilter.hh │ ├── Bpacking.hh │ ├── BpackingAvx512.cc │ ├── BpackingAvx512.hh │ ├── BpackingDefault.cc │ ├── BpackingDefault.hh │ ├── ByteRLE.cc │ ├── ByteRLE.hh │ ├── CMakeLists.txt │ ├── ColumnPrinter.cc │ ├── ColumnReader.cc │ ├── ColumnReader.hh │ ├── ColumnWriter.cc │ ├── ColumnWriter.hh │ ├── Common.cc │ ├── Compression.cc │ ├── Compression.hh │ ├── ConvertColumnReader.cc │ ├── ConvertColumnReader.hh │ ├── CpuInfoUtil.cc │ ├── CpuInfoUtil.hh │ ├── Dispatch.hh │ ├── Exceptions.cc │ ├── Int128.cc │ ├── LzoDecompressor.cc │ ├── LzoDecompressor.hh │ ├── MemoryPool.cc │ ├── Murmur3.cc │ ├── Murmur3.hh │ ├── Options.hh │ ├── OrcFile.cc │ ├── OrcHdfsFile.cc │ ├── RLE.cc │ ├── RLE.hh │ ├── RLEV2Util.cc │ ├── RLEV2Util.hh │ ├── RLEv1.cc │ ├── RLEv1.hh │ ├── RLEv2.hh │ ├── Reader.cc │ ├── Reader.hh │ ├── RleDecoderV2.cc │ ├── RleEncoderV2.cc │ ├── SchemaEvolution.cc │ ├── SchemaEvolution.hh │ ├── Statistics.cc │ ├── Statistics.hh │ ├── StripeStream.cc │ ├── StripeStream.hh │ ├── Timezone.cc │ ├── Timezone.hh │ ├── TypeImpl.cc │ ├── TypeImpl.hh │ ├── Utils.hh │ ├── Vector.cc │ ├── Writer.cc │ ├── io │ │ ├── Cache.cc │ │ ├── Cache.hh │ │ ├── InputStream.cc │ │ ├── InputStream.hh │ │ ├── OutputStream.cc │ │ └── OutputStream.hh │ ├── sargs │ │ ├── ExpressionTree.cc │ │ ├── ExpressionTree.hh │ │ ├── Literal.cc │ │ ├── PredicateLeaf.cc │ │ ├── PredicateLeaf.hh │ │ ├── SargsApplier.cc │ │ ├── SargsApplier.hh │ │ ├── SearchArgument.cc │ │ ├── SearchArgument.hh │ │ └── TruthValue.cc │ └── wrap │ │ ├── coded-stream-wrapper.h │ │ ├── gmock.h │ │ ├── gtest-wrapper.h │ │ ├── orc-proto-wrapper.cc │ │ ├── orc-proto-wrapper.hh │ │ ├── snappy-wrapper.h │ │ └── zero-copy-stream-wrapper.h └── test │ ├── CMakeLists.txt │ ├── CreateTestFiles.cc │ ├── MemoryInputStream.cc │ ├── MemoryInputStream.hh │ ├── MemoryOutputStream.cc │ ├── MemoryOutputStream.hh │ ├── MockStripeStreams.cc │ ├── MockStripeStreams.hh │ ├── OrcTest.hh │ ├── TestAttributes.cc │ ├── TestBlockBuffer.cc │ ├── TestBloomFilter.cc │ ├── TestBufferedOutputStream.cc │ ├── TestByteRLEEncoder.cc │ ├── TestByteRle.cc │ ├── TestCache.cc │ ├── TestColumnPrinter.cc │ ├── TestColumnReader.cc │ ├── TestColumnStatistics.cc │ ├── TestCompression.cc │ ├── TestConvertColumnReader.cc │ ├── TestDecimal.cc │ ├── TestDecompression.cc │ ├── TestDictionaryEncoding.cc │ ├── TestDriver.cc │ ├── TestInt128.cc │ ├── TestMurmur3.cc │ ├── TestPredicateLeaf.cc │ ├── TestPredicatePushdown.cc │ ├── TestRLEV2Util.cc │ ├── TestReader.cc │ ├── TestRleDecoder.cc │ ├── TestRleEncoder.cc │ ├── TestRleVectorDecoder.cc │ ├── TestSargsApplier.cc │ ├── TestSchemaEvolution.cc │ ├── TestSearchArgument.cc │ ├── TestStripeIndexStatistics.cc │ ├── TestTimestampStatistics.cc │ ├── TestTimezone.cc │ ├── TestType.cc │ ├── TestWriter.cc │ └── test-orc.suppress ├── cmake_modules ├── CheckFormat.cmake ├── CheckSourceCompiles.cmake ├── ConfigSimdLevel.cmake ├── FindCyrusSASL.cmake ├── FindGTest.cmake ├── FindLZ4.cmake ├── FindProtobuf.cmake ├── FindSnappy.cmake ├── FindZLIB.cmake ├── FindZSTD.cmake └── ThirdpartyToolchain.cmake ├── conan ├── all │ ├── ConanThirdpartyToolchain.cmake │ ├── conandata.yml │ ├── conanfile.py │ └── test_package │ │ ├── CMakeLists.txt │ │ ├── conanfile.py │ │ └── test_package.cpp └── config.yml ├── dev └── merge_orc_pr.py ├── docker ├── .gitignore ├── README.md ├── amazonlinux23 │ └── Dockerfile ├── debian11 │ └── Dockerfile ├── debian12 │ └── Dockerfile ├── fedora37 │ └── Dockerfile ├── oraclelinux9 │ └── Dockerfile ├── os-list.txt ├── reinit.sh ├── run-all.sh ├── run-one.sh ├── ubuntu22 │ └── Dockerfile └── ubuntu24 │ └── Dockerfile ├── examples ├── TestCSVFileImport.test10rows.csv ├── TestCSVFileImport.testTimezoneOption.csv ├── TestOrcFile.columnProjection.orc ├── TestOrcFile.emptyFile.orc ├── TestOrcFile.metaData.orc ├── TestOrcFile.test1.orc ├── TestOrcFile.testDate1900.orc ├── TestOrcFile.testDate2038.orc ├── TestOrcFile.testMemoryManagementV11.orc ├── TestOrcFile.testMemoryManagementV12.orc ├── TestOrcFile.testPredicatePushdown.orc ├── TestOrcFile.testSargSkipPickupGroupWithoutIndexCPlusPlus.orc ├── TestOrcFile.testSargSkipPickupGroupWithoutIndexJava.orc ├── TestOrcFile.testSeek.orc ├── TestOrcFile.testSnappy.orc ├── TestOrcFile.testStringAndBinaryStatistics.orc ├── TestOrcFile.testStripeLevelStats.orc ├── TestOrcFile.testTimestamp.orc ├── TestOrcFile.testUnionAndTimestamp.orc ├── TestOrcFile.testWithoutCompressionBlockSize.orc ├── TestOrcFile.testWithoutIndex.orc ├── TestStringDictionary.testRowIndex.orc ├── TestVectorOrcFile.testLz4.orc ├── TestVectorOrcFile.testLzo.orc ├── TestVectorOrcFile.testZstd.0.12.orc ├── bad_bloom_filter_1.6.0.orc ├── bad_bloom_filter_1.6.11.orc ├── complextypes_iceberg.orc ├── corrupt │ ├── missing_blob_stream_in_string_dict.orc │ ├── missing_length_stream_in_string_dict.orc │ ├── negative_dict_entry_lengths.orc │ └── stripe_footer_bad_column_encodings.orc ├── decimal.orc ├── decimal64_v2.orc ├── decimal64_v2_cplusplus.orc ├── demo-11-none.orc ├── demo-11-zlib.orc ├── demo-12-zlib.orc ├── encrypted │ ├── kms.keystore │ ├── sample1.orc │ └── sample2.orc ├── expected │ ├── TestOrcFile.columnProjection.jsn.gz │ ├── TestOrcFile.emptyFile.jsn.gz │ ├── TestOrcFile.metaData.jsn.gz │ ├── TestOrcFile.test1.jsn.gz │ ├── TestOrcFile.testDate1900.jsn.gz │ ├── TestOrcFile.testDate2038.jsn.gz │ ├── TestOrcFile.testMemoryManagementV11.jsn.gz │ ├── TestOrcFile.testMemoryManagementV12.jsn.gz │ ├── TestOrcFile.testPredicatePushdown.jsn.gz │ ├── TestOrcFile.testSeek.jsn.gz │ ├── TestOrcFile.testSnappy.jsn.gz │ ├── TestOrcFile.testStringAndBinaryStatistics.jsn.gz │ ├── TestOrcFile.testStripeLevelStats.jsn.gz │ ├── TestOrcFile.testTimestamp.jsn.gz │ ├── TestOrcFile.testUnionAndTimestamp.jsn.gz │ ├── TestOrcFile.testWithoutIndex.jsn.gz │ ├── TestStringDictionary.testRowIndex.jsn.gz │ ├── TestVectorOrcFile.testLz4.jsn.gz │ ├── TestVectorOrcFile.testLzo.jsn.gz │ ├── decimal.jsn.gz │ ├── demo-12-zlib.jsn.gz │ ├── nulls-at-end-snappy.jsn.gz │ ├── orc-file-11-format.jsn.gz │ ├── orc_index_int_string.jsn.gz │ ├── orc_split_elim.jsn.gz │ ├── orc_split_elim_cpp.jsn.gz │ ├── orc_split_elim_new.jsn.gz │ └── over1k_bloom.jsn.gz ├── nulls-at-end-snappy.orc ├── orc-file-11-format.orc ├── orc_index_int_string.orc ├── orc_no_format.orc ├── orc_split_elim.orc ├── orc_split_elim_cpp.orc ├── orc_split_elim_new.orc ├── over1k_bloom.orc ├── version1999.orc └── zero.orc ├── java ├── .idea │ └── vcs.xml ├── .mvn │ └── jvm.config ├── CMakeLists.txt ├── bench │ ├── .gitignore │ ├── README.md │ ├── core │ │ ├── pom.xml │ │ └── src │ │ │ ├── assembly │ │ │ └── uber.xml │ │ │ ├── java │ │ │ └── org │ │ │ │ └── apache │ │ │ │ ├── hadoop │ │ │ │ └── fs │ │ │ │ │ └── TrackingLocalFileSystem.java │ │ │ │ └── orc │ │ │ │ ├── bench │ │ │ │ └── core │ │ │ │ │ ├── BenchmarkOptions.java │ │ │ │ │ ├── CompressionKind.java │ │ │ │ │ ├── Driver.java │ │ │ │ │ ├── IOCounters.java │ │ │ │ │ ├── NullFileSystem.java │ │ │ │ │ ├── OrcBenchmark.java │ │ │ │ │ ├── RandomGenerator.java │ │ │ │ │ ├── RecordCounters.java │ │ │ │ │ ├── SalesGenerator.java │ │ │ │ │ ├── Utilities.java │ │ │ │ │ ├── convert │ │ │ │ │ ├── BatchReader.java │ │ │ │ │ ├── BatchWriter.java │ │ │ │ │ ├── GenerateVariants.java │ │ │ │ │ ├── ScanVariants.java │ │ │ │ │ ├── avro │ │ │ │ │ │ ├── AvroReader.java │ │ │ │ │ │ ├── AvroSchemaUtils.java │ │ │ │ │ │ └── AvroWriter.java │ │ │ │ │ ├── csv │ │ │ │ │ │ └── CsvReader.java │ │ │ │ │ ├── json │ │ │ │ │ │ ├── JsonReader.java │ │ │ │ │ │ └── JsonWriter.java │ │ │ │ │ ├── orc │ │ │ │ │ │ ├── OrcReader.java │ │ │ │ │ │ └── OrcWriter.java │ │ │ │ │ └── parquet │ │ │ │ │ │ ├── ParquetReader.java │ │ │ │ │ │ └── ParquetWriter.java │ │ │ │ │ ├── filter │ │ │ │ │ ├── FilterBench.java │ │ │ │ │ └── FilterBenchUtil.java │ │ │ │ │ └── impl │ │ │ │ │ ├── ChunkReadBench.java │ │ │ │ │ └── ChunkReadUtil.java │ │ │ │ └── impl │ │ │ │ └── filter │ │ │ │ ├── RowFilter.java │ │ │ │ └── RowFilterFactory.java │ │ │ ├── resources │ │ │ ├── github.schema │ │ │ ├── log4j.properties │ │ │ ├── sales.schema │ │ │ └── taxi.schema │ │ │ └── test │ │ │ └── org │ │ │ └── apache │ │ │ └── orc │ │ │ ├── bench │ │ │ └── core │ │ │ │ ├── filter │ │ │ │ └── TestFilter.java │ │ │ │ └── impl │ │ │ │ └── ChunkReadUtilTest.java │ │ │ └── impl │ │ │ └── filter │ │ │ ├── ATestFilter.java │ │ │ └── TestRowFilter.java │ ├── fetch-data.sh │ ├── hive │ │ ├── pom.xml │ │ └── src │ │ │ ├── assembly │ │ │ └── uber.xml │ │ │ └── java │ │ │ └── org │ │ │ └── apache │ │ │ ├── hadoop │ │ │ └── hive │ │ │ │ └── ql │ │ │ │ └── io │ │ │ │ └── orc │ │ │ │ └── OrcBenchmarkUtilities.java │ │ │ └── orc │ │ │ └── bench │ │ │ └── hive │ │ │ ├── ColumnProjectionBenchmark.java │ │ │ ├── DecimalBench.java │ │ │ ├── FullReadBenchmark.java │ │ │ ├── ORCWriterBenchMark.java │ │ │ ├── RowFilterProjectionBenchmark.java │ │ │ └── rowfilter │ │ │ ├── BooleanRowFilterBenchmark.java │ │ │ ├── DecimalRowFilterBenchmark.java │ │ │ ├── DoubleRowFilterBenchmark.java │ │ │ ├── RowFilterInputState.java │ │ │ ├── StringRowFilterBenchmark.java │ │ │ └── TimestampRowFilterBenchmark.java │ ├── pom.xml │ └── spark │ │ ├── pom.xml │ │ └── src │ │ └── java │ │ └── org │ │ └── apache │ │ └── orc │ │ └── bench │ │ └── spark │ │ ├── SparkBenchmark.java │ │ └── SparkSchema.java ├── checkstyle-suppressions.xml ├── checkstyle.xml ├── core │ ├── pom.xml │ └── src │ │ ├── java │ │ └── org │ │ │ ├── apache │ │ │ └── orc │ │ │ │ ├── BinaryColumnStatistics.java │ │ │ │ ├── BooleanColumnStatistics.java │ │ │ │ ├── CollectionColumnStatistics.java │ │ │ │ ├── ColumnStatistics.java │ │ │ │ ├── CompressionCodec.java │ │ │ │ ├── CompressionKind.java │ │ │ │ ├── DataMask.java │ │ │ │ ├── DataMaskDescription.java │ │ │ │ ├── DataReader.java │ │ │ │ ├── DateColumnStatistics.java │ │ │ │ ├── DecimalColumnStatistics.java │ │ │ │ ├── DoubleColumnStatistics.java │ │ │ │ ├── EncryptionKey.java │ │ │ │ ├── EncryptionVariant.java │ │ │ │ ├── FileFormatException.java │ │ │ │ ├── FileMetadata.java │ │ │ │ ├── InMemoryKeystore.java │ │ │ │ ├── IntegerColumnStatistics.java │ │ │ │ ├── MemoryManager.java │ │ │ │ ├── OrcConf.java │ │ │ │ ├── OrcFile.java │ │ │ │ ├── OrcFilterContext.java │ │ │ │ ├── OrcUtils.java │ │ │ │ ├── PhysicalWriter.java │ │ │ │ ├── Reader.java │ │ │ │ ├── RecordReader.java │ │ │ │ ├── StringColumnStatistics.java │ │ │ │ ├── StripeInformation.java │ │ │ │ ├── StripeStatistics.java │ │ │ │ ├── TimestampColumnStatistics.java │ │ │ │ ├── TypeDescription.java │ │ │ │ ├── TypeDescriptionPrettyPrint.java │ │ │ │ ├── UnknownFormatException.java │ │ │ │ ├── Writer.java │ │ │ │ ├── filter │ │ │ │ ├── BatchFilter.java │ │ │ │ └── PluginFilterService.java │ │ │ │ ├── impl │ │ │ │ ├── AcidStats.java │ │ │ │ ├── AircompressorCodec.java │ │ │ │ ├── BitFieldReader.java │ │ │ │ ├── BitFieldWriter.java │ │ │ │ ├── BrotliCodec.java │ │ │ │ ├── BufferChunk.java │ │ │ │ ├── BufferChunkList.java │ │ │ │ ├── ColumnStatisticsImpl.java │ │ │ │ ├── ConvertTreeReaderFactory.java │ │ │ │ ├── CryptoUtils.java │ │ │ │ ├── DataReaderProperties.java │ │ │ │ ├── DateUtils.java │ │ │ │ ├── Dictionary.java │ │ │ │ ├── DictionaryUtils.java │ │ │ │ ├── DirectDecompressionCodec.java │ │ │ │ ├── DynamicByteArray.java │ │ │ │ ├── DynamicIntArray.java │ │ │ │ ├── HadoopShimsFactory.java │ │ │ │ ├── IOUtils.java │ │ │ │ ├── InStream.java │ │ │ │ ├── IntegerReader.java │ │ │ │ ├── IntegerWriter.java │ │ │ │ ├── MaskDescriptionImpl.java │ │ │ │ ├── MemoryManager.java │ │ │ │ ├── MemoryManagerImpl.java │ │ │ │ ├── OrcAcidUtils.java │ │ │ │ ├── OrcCodecPool.java │ │ │ │ ├── OrcFilterContextImpl.java │ │ │ │ ├── OrcIndex.java │ │ │ │ ├── OrcTail.java │ │ │ │ ├── OutStream.java │ │ │ │ ├── ParserUtils.java │ │ │ │ ├── PhysicalFsWriter.java │ │ │ │ ├── PositionProvider.java │ │ │ │ ├── PositionRecorder.java │ │ │ │ ├── PositionedOutputStream.java │ │ │ │ ├── ReaderImpl.java │ │ │ │ ├── RecordReaderImpl.java │ │ │ │ ├── RecordReaderUtils.java │ │ │ │ ├── RedBlackTree.java │ │ │ │ ├── RunLengthByteReader.java │ │ │ │ ├── RunLengthByteWriter.java │ │ │ │ ├── RunLengthIntegerReader.java │ │ │ │ ├── RunLengthIntegerReaderV2.java │ │ │ │ ├── RunLengthIntegerWriter.java │ │ │ │ ├── RunLengthIntegerWriterV2.java │ │ │ │ ├── SchemaEvolution.java │ │ │ │ ├── SerializationUtils.java │ │ │ │ ├── SnappyCodec.java │ │ │ │ ├── StreamName.java │ │ │ │ ├── StringHashTableDictionary.java │ │ │ │ ├── StringRedBlackTree.java │ │ │ │ ├── StripeStatisticsImpl.java │ │ │ │ ├── TreeReaderFactory.java │ │ │ │ ├── TypeUtils.java │ │ │ │ ├── Utf8Utils.java │ │ │ │ ├── VisitorContextImpl.java │ │ │ │ ├── WriterImpl.java │ │ │ │ ├── WriterInternal.java │ │ │ │ ├── ZlibCodec.java │ │ │ │ ├── ZstdCodec.java │ │ │ │ ├── filter │ │ │ │ │ ├── AndFilter.java │ │ │ │ │ ├── BatchFilterFactory.java │ │ │ │ │ ├── FilterFactory.java │ │ │ │ │ ├── IsNotNullFilter.java │ │ │ │ │ ├── IsNullFilter.java │ │ │ │ │ ├── LeafFilter.java │ │ │ │ │ ├── OrFilter.java │ │ │ │ │ ├── Selected.java │ │ │ │ │ ├── VectorFilter.java │ │ │ │ │ └── leaf │ │ │ │ │ │ ├── DecimalFilters.java │ │ │ │ │ │ ├── FloatFilters.java │ │ │ │ │ │ ├── LeafFilterFactory.java │ │ │ │ │ │ ├── LongFilters.java │ │ │ │ │ │ ├── StringFilters.java │ │ │ │ │ │ └── TimestampFilters.java │ │ │ │ ├── mask │ │ │ │ │ ├── DecimalIdentity.java │ │ │ │ │ ├── DoubleIdentity.java │ │ │ │ │ ├── ListIdentity.java │ │ │ │ │ ├── LongIdentity.java │ │ │ │ │ ├── MapIdentity.java │ │ │ │ │ ├── MaskFactory.java │ │ │ │ │ ├── MaskProvider.java │ │ │ │ │ ├── NullifyMask.java │ │ │ │ │ ├── RedactMaskFactory.java │ │ │ │ │ ├── SHA256MaskFactory.java │ │ │ │ │ ├── StructIdentity.java │ │ │ │ │ ├── TimestampIdentity.java │ │ │ │ │ └── UnionIdentity.java │ │ │ │ ├── reader │ │ │ │ │ ├── ReaderEncryption.java │ │ │ │ │ ├── ReaderEncryptionKey.java │ │ │ │ │ ├── ReaderEncryptionVariant.java │ │ │ │ │ ├── StripePlanner.java │ │ │ │ │ └── tree │ │ │ │ │ │ ├── BatchReader.java │ │ │ │ │ │ ├── PrimitiveBatchReader.java │ │ │ │ │ │ ├── StructBatchReader.java │ │ │ │ │ │ └── TypeReader.java │ │ │ │ └── writer │ │ │ │ │ ├── BinaryTreeWriter.java │ │ │ │ │ ├── BooleanTreeWriter.java │ │ │ │ │ ├── ByteTreeWriter.java │ │ │ │ │ ├── CharTreeWriter.java │ │ │ │ │ ├── DateTreeWriter.java │ │ │ │ │ ├── Decimal64TreeWriter.java │ │ │ │ │ ├── DecimalTreeWriter.java │ │ │ │ │ ├── DoubleTreeWriter.java │ │ │ │ │ ├── EncryptionTreeWriter.java │ │ │ │ │ ├── FloatTreeWriter.java │ │ │ │ │ ├── IntegerTreeWriter.java │ │ │ │ │ ├── ListTreeWriter.java │ │ │ │ │ ├── MapTreeWriter.java │ │ │ │ │ ├── StreamOptions.java │ │ │ │ │ ├── StringBaseTreeWriter.java │ │ │ │ │ ├── StringTreeWriter.java │ │ │ │ │ ├── StructTreeWriter.java │ │ │ │ │ ├── TimestampTreeWriter.java │ │ │ │ │ ├── TreeWriter.java │ │ │ │ │ ├── TreeWriterBase.java │ │ │ │ │ ├── UnionTreeWriter.java │ │ │ │ │ ├── VarcharTreeWriter.java │ │ │ │ │ ├── WriterContext.java │ │ │ │ │ ├── WriterEncryptionKey.java │ │ │ │ │ ├── WriterEncryptionVariant.java │ │ │ │ │ └── WriterImplV2.java │ │ │ │ └── util │ │ │ │ ├── BloomFilter.java │ │ │ │ ├── BloomFilterIO.java │ │ │ │ ├── BloomFilterUtf8.java │ │ │ │ ├── CuckooSetBytes.java │ │ │ │ ├── Murmur3.java │ │ │ │ └── StreamWrapperFileSystem.java │ │ │ └── threeten │ │ │ └── extra │ │ │ └── chrono │ │ │ ├── HybridChronology.java │ │ │ └── HybridDate.java │ │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ ├── org.apache.orc.DataMask$Provider │ │ │ └── org.apache.orc.impl.KeyProvider$Factory │ │ └── test │ │ ├── org │ │ └── apache │ │ │ └── orc │ │ │ ├── StringDictTestingUtils.java │ │ │ ├── TestColumnStatistics.java │ │ │ ├── TestCorruptTypes.java │ │ │ ├── TestInMemoryKeystore.java │ │ │ ├── TestMinSeekSize.java │ │ │ ├── TestNewIntegerEncoding.java │ │ │ ├── TestOrcConf.java │ │ │ ├── TestOrcDSTNoTimezone.java │ │ │ ├── TestOrcFilterContext.java │ │ │ ├── TestOrcNoTimezone.java │ │ │ ├── TestOrcNullOptimization.java │ │ │ ├── TestOrcTimestampPPD.java │ │ │ ├── TestOrcTimezone1.java │ │ │ ├── TestOrcTimezone2.java │ │ │ ├── TestOrcTimezone3.java │ │ │ ├── TestOrcTimezone4.java │ │ │ ├── TestOrcTimezonePPD.java │ │ │ ├── TestOrcUtils.java │ │ │ ├── TestOrcWithLargeStripeStatistics.java │ │ │ ├── TestProlepticConversions.java │ │ │ ├── TestReader.java │ │ │ ├── TestRowFilteringComplexTypes.java │ │ │ ├── TestRowFilteringComplexTypesNulls.java │ │ │ ├── TestRowFilteringIOSkip.java │ │ │ ├── TestRowFilteringNoSkip.java │ │ │ ├── TestRowFilteringSkip.java │ │ │ ├── TestSelectedVector.java │ │ │ ├── TestStringDictionary.java │ │ │ ├── TestTypeDescription.java │ │ │ ├── TestUnicode.java │ │ │ ├── TestUnrolledBitPack.java │ │ │ ├── TestVectorOrcFile.java │ │ │ ├── impl │ │ │ ├── MockDFSDataInputStream.java │ │ │ ├── MockDataReader.java │ │ │ ├── MockStream.java │ │ │ ├── MockStripe.java │ │ │ ├── TestBitFieldReader.java │ │ │ ├── TestBitPack.java │ │ │ ├── TestBrotli.java │ │ │ ├── TestColumnStatisticsImpl.java │ │ │ ├── TestConvertTreeReaderFactory.java │ │ │ ├── TestCryptoUtils.java │ │ │ ├── TestDataReaderProperties.java │ │ │ ├── TestDateUtils.java │ │ │ ├── TestDynamicArray.java │ │ │ ├── TestDynamicIntArray.java │ │ │ ├── TestEncryption.java │ │ │ ├── TestInStream.java │ │ │ ├── TestIntegerCompressionReader.java │ │ │ ├── TestMemoryManager.java │ │ │ ├── TestOrcFilterContextImpl.java │ │ │ ├── TestOrcLargeStripe.java │ │ │ ├── TestOrcWideTable.java │ │ │ ├── TestOutStream.java │ │ │ ├── TestPhysicalFsWriter.java │ │ │ ├── TestPredicatePushDownBounds.java │ │ │ ├── TestReaderImpl.java │ │ │ ├── TestRecordReaderImpl.java │ │ │ ├── TestRecordReaderUtils.java │ │ │ ├── TestRunLengthByteReader.java │ │ │ ├── TestRunLengthIntegerReader.java │ │ │ ├── TestSchemaEvolution.java │ │ │ ├── TestSerializationUtils.java │ │ │ ├── TestStreamName.java │ │ │ ├── TestStringHashTableDictionary.java │ │ │ ├── TestStringRedBlackTree.java │ │ │ ├── TestWriterImpl.java │ │ │ ├── TestZlib.java │ │ │ ├── TestZstd.java │ │ │ ├── filter │ │ │ │ ├── ATestFilter.java │ │ │ │ ├── FilterUtils.java │ │ │ │ ├── IsNullFilterTest.java │ │ │ │ ├── MyFilterService.java │ │ │ │ ├── TestAndFilter.java │ │ │ │ ├── TestConvFilter.java │ │ │ │ ├── TestNotFilter.java │ │ │ │ ├── TestOrFilter.java │ │ │ │ ├── TestPluginFilterService.java │ │ │ │ ├── TestPluginFilters.java │ │ │ │ ├── TestSelected.java │ │ │ │ └── leaf │ │ │ │ │ ├── ATestLeafFilter.java │ │ │ │ │ ├── TestDecimalFilters.java │ │ │ │ │ ├── TestEquals.java │ │ │ │ │ ├── TestFilters.java │ │ │ │ │ ├── TestFloatFilters.java │ │ │ │ │ ├── TestLongFilters.java │ │ │ │ │ ├── TestStringFilters.java │ │ │ │ │ └── TestTimestampFilters.java │ │ │ ├── mask │ │ │ │ ├── TestDataMask.java │ │ │ │ ├── TestRedactMask.java │ │ │ │ ├── TestSHA256Mask.java │ │ │ │ └── TestUnmaskRange.java │ │ │ └── reader │ │ │ │ └── TestReaderEncryptionVariant.java │ │ │ └── util │ │ │ ├── CuckooSetBytesTest.java │ │ │ ├── TestBloomFilter.java │ │ │ ├── TestMurmur3.java │ │ │ └── TestStreamWrapperFileSystem.java │ │ └── resources │ │ ├── col.dot.orc │ │ ├── log4j.properties │ │ ├── orc-file-11-format.orc │ │ ├── orc-file-dst-no-timezone.orc │ │ ├── orc-file-no-double-statistic.orc │ │ ├── orc-file-no-timezone.orc │ │ ├── orc_corrupt_zlib.orc │ │ └── orc_split_elim.orc ├── examples │ ├── pom.xml │ └── src │ │ ├── assembly │ │ └── uber.xml │ │ └── java │ │ └── org │ │ └── apache │ │ └── orc │ │ └── examples │ │ ├── AdvancedReader.java │ │ ├── AdvancedWriter.java │ │ ├── CompressionWriter.java │ │ ├── CoreReader.java │ │ ├── CoreWriter.java │ │ ├── Driver.java │ │ ├── InMemoryEncryptionReader.java │ │ └── InMemoryEncryptionWriter.java ├── mapreduce │ ├── pom.xml │ └── src │ │ ├── java │ │ └── org │ │ │ └── apache │ │ │ └── orc │ │ │ ├── mapred │ │ │ ├── OrcInputFormat.java │ │ │ ├── OrcKey.java │ │ │ ├── OrcList.java │ │ │ ├── OrcMap.java │ │ │ ├── OrcMapredRecordReader.java │ │ │ ├── OrcMapredRecordWriter.java │ │ │ ├── OrcOutputFormat.java │ │ │ ├── OrcStruct.java │ │ │ ├── OrcTimestamp.java │ │ │ ├── OrcUnion.java │ │ │ ├── OrcValue.java │ │ │ └── package-info.java │ │ │ └── mapreduce │ │ │ ├── OrcInputFormat.java │ │ │ ├── OrcMapreduceRecordReader.java │ │ │ ├── OrcMapreduceRecordWriter.java │ │ │ └── OrcOutputFormat.java │ │ └── test │ │ ├── org │ │ └── apache │ │ │ └── orc │ │ │ ├── mapred │ │ │ ├── TestMapRedFiltering.java │ │ │ ├── TestOrcFileEvolution.java │ │ │ ├── TestOrcList.java │ │ │ ├── TestOrcMap.java │ │ │ ├── TestOrcOutputFormat.java │ │ │ ├── TestOrcRecordWriter.java │ │ │ ├── TestOrcStruct.java │ │ │ ├── TestOrcTimestamp.java │ │ │ └── TestOrcUnion.java │ │ │ └── mapreduce │ │ │ ├── FilterTestUtil.java │ │ │ ├── TestMapReduceFiltering.java │ │ │ ├── TestMapreduceOrcOutputFormat.java │ │ │ ├── TestMrUnit.java │ │ │ └── TestOrcRecordWriter.java │ │ └── resources │ │ ├── acid5k.orc │ │ └── log4j.properties ├── mvnw ├── pom.xml ├── shims │ ├── pom.xml │ └── src │ │ ├── java │ │ └── org │ │ │ └── apache │ │ │ └── orc │ │ │ ├── EncryptionAlgorithm.java │ │ │ └── impl │ │ │ ├── HadoopShims.java │ │ │ ├── HadoopShimsCurrent.java │ │ │ ├── KeyProvider.java │ │ │ ├── KeyProviderImpl.java │ │ │ ├── LocalKey.java │ │ │ ├── NullKeyProvider.java │ │ │ ├── SnappyDirectDecompressWrapper.java │ │ │ ├── ZeroCopyShims.java │ │ │ └── ZlibDirectDecompressWrapper.java │ │ └── test │ │ ├── org │ │ └── apache │ │ │ └── orc │ │ │ └── impl │ │ │ ├── TestHadoopShimsPost3_3_4.java │ │ │ └── TestHadoopShimsPre2_7.java │ │ └── resources │ │ └── log4j.properties ├── spotbugs-exclude.xml ├── spotbugs-include.xml └── tools │ ├── pom.xml │ └── src │ ├── assembly │ └── uber.xml │ ├── java │ └── org │ │ └── apache │ │ └── orc │ │ └── tools │ │ ├── CheckTool.java │ │ ├── ColumnSizes.java │ │ ├── Driver.java │ │ ├── FileDump.java │ │ ├── JsonFileDump.java │ │ ├── KeyTool.java │ │ ├── MergeFiles.java │ │ ├── PrintData.java │ │ ├── PrintVersion.java │ │ ├── RowCount.java │ │ ├── ScanData.java │ │ ├── convert │ │ ├── ConvertTool.java │ │ ├── CsvReader.java │ │ └── JsonReader.java │ │ └── json │ │ ├── BooleanType.java │ │ ├── HiveType.java │ │ ├── JsonSchemaFinder.java │ │ ├── JsonShredder.java │ │ ├── ListType.java │ │ ├── MapType.java │ │ ├── NullType.java │ │ ├── NumericType.java │ │ ├── StringType.java │ │ ├── StructType.java │ │ └── UnionType.java │ └── test │ ├── org │ └── apache │ │ └── orc │ │ ├── impl │ │ ├── FakeKeyProvider.java │ │ ├── TestHadoopKeyProvider.java │ │ └── TestRLEv2.java │ │ └── tools │ │ ├── TestCheckTool.java │ │ ├── TestColumnSizes.java │ │ ├── TestFileDump.java │ │ ├── TestJsonFileDump.java │ │ ├── TestMergeFiles.java │ │ ├── TestRowCount.java │ │ ├── TestScanData.java │ │ ├── convert │ │ ├── TestConvert.java │ │ ├── TestConvertORC.java │ │ ├── TestCsvReader.java │ │ └── TestJsonReader.java │ │ └── json │ │ └── TestJsonSchemaFinder.java │ └── resources │ ├── META-INF │ └── services │ │ └── org.apache.hadoop.crypto.key.KeyProviderFactory │ ├── log4j.properties │ ├── orc-file-dump-bloomfilter.out │ ├── orc-file-dump-bloomfilter2.out │ ├── orc-file-dump-column-type.out │ ├── orc-file-dump-dictionary-threshold.out │ ├── orc-file-dump-nan-and-infinite.json │ ├── orc-file-dump.json │ ├── orc-file-dump.out │ └── orc-file-has-null.out ├── site ├── .dockerignore ├── .gitignore ├── .htaccess ├── Dockerfile ├── Gemfile ├── README.md ├── _config.yml ├── _data │ ├── docs.yml │ └── releases.yml ├── _docs │ ├── acid.md │ ├── adopters.md │ ├── building.md │ ├── core-cpp.md │ ├── core-java-config.md │ ├── core-java.md │ ├── cpp-tools.md │ ├── dask.md │ ├── hive-config.md │ ├── hive-ddl.md │ ├── index.md │ ├── indexes.md │ ├── java-tools.md │ ├── mapred.md │ ├── mapreduce.md │ ├── pyarrow.md │ ├── spark-config.md │ ├── spark-ddl.md │ └── types.md ├── _includes │ ├── anchor_links.html │ ├── docs_contents.html │ ├── docs_contents_mobile.html │ ├── docs_option.html │ ├── docs_ul.html │ ├── footer.html │ ├── header.html │ ├── known_issues.md │ ├── news_contents.html │ ├── news_contents_mobile.html │ ├── news_item.html │ ├── orc_1.1.md │ ├── orc_1.2.md │ ├── orc_1.3.md │ ├── orc_1.4.md │ ├── orc_1.5.md │ ├── orc_1.6.md │ ├── orc_1.7.md │ ├── orc_1.8.md │ ├── orc_1.9.md │ ├── orc_2.0.md │ ├── orc_2.1.md │ ├── primary-nav-items.html │ ├── release_description.md │ ├── release_table.html │ ├── section_nav.html │ └── top.html ├── _layouts │ ├── default.html │ ├── docs.html │ ├── news.html │ ├── news_item.html │ └── page.html ├── _posts │ ├── 2015-04-22-orc-as-tlp.md │ ├── 2015-05-11-new-committers.md │ ├── 2015-06-26-new-logo.md │ ├── 2015-11-19-aliaksei-on-pmc.md │ ├── 2016-01-25-ORC-1.0.0.md │ ├── 2016-06-10-ORC-1.1.0.md │ ├── 2016-06-13-ORC-1.1.1.md │ ├── 2016-06-28-file-format-talk.md │ ├── 2016-07-08-ORC-1.1.2.md │ ├── 2016-08-25-ORC-1.2.0.md │ ├── 2016-10-05-ORC-1.2.1.md │ ├── 2016-12-01-ORC-1.2.2.md │ ├── 2016-12-12-ORC-1.2.3.md │ ├── 2016-12-15-new-committers.md │ ├── 2017-01-04-gopal-on-pmc.md │ ├── 2017-01-23-ORC-1.3.0.md │ ├── 2017-02-03-ORC-1.3.1.md │ ├── 2017-02-13-ORC-1.3.2.md │ ├── 2017-02-21-ORC-1.3.3.md │ ├── 2017-05-08-ORC-1.4.0.md │ ├── 2017-05-16-new-committer.md │ ├── 2017-09-06-new-pmc.md │ ├── 2017-10-16-ORC-1.3.4.md │ ├── 2017-10-16-ORC-1.4.1.md │ ├── 2018-01-23-ORC-1.4.2.md │ ├── 2018-02-09-ORC-1.4.3.md │ ├── 2018-03-27-add-xiening-and-gang.md │ ├── 2018-05-14-ORC-1.4.4.md │ ├── 2018-05-14-ORC-1.5.0.md │ ├── 2018-05-25-ORC-1.5.1.md │ ├── 2018-06-29-ORC-1.5.2.md │ ├── 2018-09-25-ORC-1.5.3.md │ ├── 2018-12-21-ORC-1.5.4.md │ ├── 2019-01-10-add-dongjoon.md │ ├── 2019-01-11-gang-pmc.md │ ├── 2019-03-14-ORC-1.5.5.md │ ├── 2019-06-10-renat-and-sandeep.md │ ├── 2019-06-27-ORC-1.5.6.md │ ├── 2019-09-03-ORC-1.6.0.md │ ├── 2019-10-26-ORC-1.5.7.md │ ├── 2019-10-26-ORC-1.6.1.md │ ├── 2019-11-24-ORC-1.5.8.md │ ├── 2019-11-24-ORC-1.6.2.md │ ├── 2019-12-09-ORC-1.4.5.md │ ├── 2019-12-09-dongjoon-pmc.md │ ├── 2020-01-30-ORC-1.5.9.md │ ├── 2020-04-26-ORC-1.5.10.md │ ├── 2020-04-26-ORC-1.6.3.md │ ├── 2020-09-14-ORC-1.5.11.md │ ├── 2020-09-14-ORC-1.6.4.md │ ├── 2020-09-30-ORC-1.5.12.md │ ├── 2020-10-01-ORC-1.6.5.md │ ├── 2020-11-16-add-panagiotis.md │ ├── 2020-12-10-ORC-1.6.6.md │ ├── 2021-01-22-ORC-1.6.7.md │ ├── 2021-02-08-panagiotis-pmc.md │ ├── 2021-04-13-add-william.md │ ├── 2021-05-21-ORC-1.6.8.md │ ├── 2021-07-02-ORC-1.6.9.md │ ├── 2021-08-10-ORC-1.6.10.md │ ├── 2021-09-15-ORC-1.5.13.md │ ├── 2021-09-15-ORC-1.6.11.md │ ├── 2021-09-15-ORC-1.7.0.md │ ├── 2021-10-02-william-pmc.md │ ├── 2021-11-07-ORC-1.6.12.md │ ├── 2021-11-07-ORC-1.7.1.md │ ├── 2021-11-23-add-yiqun.md │ ├── 2021-12-20-ORC-1.7.2.md │ ├── 2022-01-20-ORC-1.6.13.md │ ├── 2022-02-09-ORC-1.7.3.md │ ├── 2022-03-05-add-quanlong.md │ ├── 2022-04-14-ORC-1.6.14.md │ ├── 2022-04-15-ORC-1.7.4.md │ ├── 2022-05-08-yiqun-pmc.md │ ├── 2022-06-05-add-pavan.md │ ├── 2022-06-16-ORC-1.7.5.md │ ├── 2022-08-17-ORC-1.7.6.md │ ├── 2022-09-03-ORC-1.8.0.md │ ├── 2022-09-12-william-chair.md │ ├── 2022-11-17-ORC-1.7.7.md │ ├── 2022-12-02-ORC-1.8.1.md │ ├── 2023-01-13-ORC-1.8.2.md │ ├── 2023-01-21-ORC-1.7.8.md │ ├── 2023-02-13-add-xin.md │ ├── 2023-03-15-ORC-1.8.3.md │ ├── 2023-05-07-ORC-1.7.9.md │ ├── 2023-06-14-ORC-1.8.4.md │ ├── 2023-06-28-ORC-1.9.0.md │ ├── 2023-08-16-ORC-1.9.1.md │ ├── 2023-09-05-ORC-1.8.5.md │ ├── 2023-11-10-ORC-1.7.10.md │ ├── 2023-11-10-ORC-1.8.6.md │ ├── 2023-11-10-ORC-1.9.2.md │ ├── 2024-01-13-add-deshan.md │ ├── 2024-03-08-ORC-2.0.0.md │ ├── 2024-03-20-ORC-1.9.3.md │ ├── 2024-04-14-ORC-1.8.7.md │ ├── 2024-05-13-add-shaoyun-and-yuanping.md │ ├── 2024-05-14-ORC-2.0.1.md │ ├── 2024-07-16-ORC-1.9.4.md │ ├── 2024-08-15-ORC-2.0.2.md │ ├── 2024-09-13-ORC-1.7.11.md │ ├── 2024-11-11-ORC-1.8.8.md │ ├── 2024-11-14-ORC-1.9.5.md │ ├── 2024-11-14-ORC-2.0.3.md │ ├── 2025-01-09-ORC-2.1.0.md │ ├── 2025-03-06-ORC-2.1.1.md │ ├── 2025-03-20-ORC-2.0.4.md │ ├── 2025-03-23-shaoyun-pmc.md │ ├── 2025-05-06-ORC-1.8.9.md │ ├── 2025-05-06-ORC-1.9.6.md │ ├── 2025-05-06-ORC-2.0.5.md │ └── 2025-05-06-ORC-2.1.2.md ├── _sass │ ├── _font-awesome.scss │ ├── _gridism.scss │ ├── _mixins.scss │ ├── _normalize.scss │ └── _pygments.scss ├── css │ └── screen.scss ├── develop │ ├── bylaws.md │ ├── coding.md │ ├── committers.md │ ├── design │ │ ├── index.md │ │ ├── io.md │ │ └── lazy_filter.md │ ├── index.md │ └── make-release.md ├── doap_orc.rdf ├── favicon.ico ├── fonts │ ├── fontawesome-webfont.eot │ ├── fontawesome-webfont.svg │ ├── fontawesome-webfont.ttf │ └── fontawesome-webfont.woff ├── help │ └── index.md ├── img │ ├── BloomFilter.png │ ├── CompressionStream.png │ ├── Direct.png │ ├── OrcFileLayout.png │ ├── TreeWriters.png │ ├── asf_logo.png │ ├── logo.png │ └── seekvsread.png ├── index.html ├── js │ ├── html5shiv.min.js │ └── respond.min.js ├── news │ ├── index.html │ └── releases │ │ └── index.html ├── releases │ ├── index.md │ └── releases.md ├── security │ ├── CVE-2018-8015.md │ ├── CVE-2025-47436.md │ └── index.md ├── specification │ ├── ORCv0.md │ ├── ORCv1.md │ ├── ORCv2.md │ └── index.md └── talks │ └── index.md └── tools ├── CMakeLists.txt ├── src ├── CMakeLists.txt ├── CSVFileImport.cc ├── FileContents.cc ├── FileMemory.cc ├── FileMetadata.cc ├── FileScan.cc ├── FileStatistics.cc ├── TimezoneDump.cc ├── ToolsHelper.cc └── ToolsHelper.hh └── test ├── CMakeLists.txt ├── TestCSVFileImport.cc ├── TestFileContents.cc ├── TestFileMetadata.cc ├── TestFileScan.cc ├── TestFileStatistics.cc ├── TestMatch.cc ├── ToolTest.cc ├── ToolTest.hh ├── gzip.cc └── gzip.hh /.asf.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # https://cwiki.apache.org/confluence/display/INFRA/git+-+.asf.yaml+features 17 | --- 18 | github: 19 | description: "Apache ORC - the smallest, fastest columnar storage for Hadoop workloads" 20 | homepage: https://orc.apache.org/ 21 | features: 22 | issues: true 23 | enabled_merge_buttons: 24 | merge: false 25 | squash: true 26 | rebase: true 27 | ghp_branch: main 28 | ghp_path: /site 29 | labels: 30 | - apache 31 | - orc 32 | - java 33 | - cpp 34 | - big-data 35 | protected_tags: 36 | - "rel/*" 37 | - "v*.*.*" 38 | notifications: 39 | pullrequests: issues@orc.apache.org 40 | issues: issues@orc.apache.org 41 | commits: commits@orc.apache.org 42 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | --- 18 | Language: Cpp 19 | BasedOnStyle: Google 20 | ColumnLimit: 100 21 | IndentWidth: 2 22 | NamespaceIndentation: All 23 | UseTab: Never 24 | AllowShortFunctionsOnASingleLine: Empty 25 | DerivePointerAlignment: false 26 | IncludeBlocks: Preserve 27 | -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | Checks: "-*, 19 | readability-identifier-naming, 20 | " 21 | 22 | CheckOptions: 23 | [ 24 | { key: readability-identifier-naming.ParameterCase, value: "camelBack" }, 25 | { key: readability-identifier-naming.PrivateMemberCase, value: "camelBack"}, 26 | { key: readability-identifier-naming.PrivateMemberSuffix, value: "_" }, 27 | { key: readability-identifier-naming.ProtectedMemberSuffix, value: "" }, 28 | { key: readability-identifier-naming.PublicMemberSuffix, value: "" }, 29 | { key: readability-identifier-naming.ParameterIgnoredRegexp, value: "^[a-zA-Z]$" }, 30 | ] 31 | 32 | WarningsAsErrors: '' 33 | HeaderFilterRegex: '(orc/c\+\+/|orc/tools)' 34 | FormatStyle: none -------------------------------------------------------------------------------- /.github/.licenserc.yaml: -------------------------------------------------------------------------------- 1 | header: 2 | license: 3 | spdx-id: Apache-2.0 4 | copyright-owner: Apache Software Foundation 5 | 6 | paths: 7 | - '**' 8 | 9 | paths-ignore: 10 | - '**/*.md' 11 | - '**/.gitignore' 12 | - '**/Doxyfile' 13 | - '.github/**' 14 | - 'dev/**' 15 | - 'docker/**' 16 | - 'examples/**' 17 | - 'java/**' 18 | - 'site/**' 19 | - 'c++/libs/**' 20 | - 'c++/test/test-orc.suppress' 21 | - 'LICENSE' 22 | - 'NOTICE' 23 | - '.clang-format' 24 | - '.asf.yaml' 25 | - '.nojekyll' 26 | 27 | comment: on-failure 28 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | version: 2 17 | updates: 18 | - package-ecosystem: "maven" 19 | directory: "/java" 20 | schedule: 21 | interval: "weekly" 22 | ignore: 23 | # Pin gson to 2.9.0 because of Hive 24 | - dependency-name: "com.google.code.gson:gson" 25 | versions: "[2.9,1)" 26 | # Pin jodd-core to 3.5.2 27 | - dependency-name: "org.jodd:jodd-core" 28 | versions: "[3.5.3,)" 29 | # Pin annotations to 17.0.0 30 | - dependency-name: "org.jetbrains.annotations" 31 | versions: "[17.0.1,)" 32 | -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | INFRA: 21 | - ".github/**/*" 22 | - "appveyor.yml" 23 | - ".asf.yaml" 24 | - ".gitignore" 25 | - "dev/**/*" 26 | - "docker/**/*" 27 | - ".clang-format" 28 | BUILD: 29 | - "CMakeLists.txt" 30 | - "cmake_modules/**/*" 31 | - "proto/**/*" 32 | - "**/*pom.xml" 33 | - "conan/**/*" 34 | DOCS: 35 | - "site/**/*" 36 | - "**/README.md" 37 | EXAMPLES: 38 | - "examples/**/*" 39 | JAVA: 40 | - "java/**/*" 41 | CPP: 42 | - "c++/**/*" 43 | - "tools/**/*" 44 | -------------------------------------------------------------------------------- /.github/lsan-suppressions.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | # 18 | # Add specific leak suppressions here if needed 19 | # Format: 20 | # leak:SymbolName 21 | # leak:source_file.cc 22 | -------------------------------------------------------------------------------- /.github/workflows/labeler.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: "On pull requests" 21 | on: pull_request_target 22 | 23 | jobs: 24 | label: 25 | name: Label pull requests 26 | runs-on: ubuntu-latest 27 | permissions: 28 | contents: read 29 | pull-requests: write 30 | steps: 31 | - uses: actions/labeler@2.2.0 32 | with: 33 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 34 | sync-labels: true 35 | -------------------------------------------------------------------------------- /.github/workflows/publish_snapshot.yml: -------------------------------------------------------------------------------- 1 | name: Publish Snapshot 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | publish-snapshot: 10 | if: github.repository == 'apache/orc' 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@master 14 | 15 | - uses: actions/setup-java@v3 16 | with: 17 | distribution: zulu 18 | java-version: 17 19 | 20 | - name: Publish snapshot 21 | env: 22 | ASF_USERNAME: ${{ secrets.NEXUS_USER }} 23 | ASF_PASSWORD: ${{ secrets.NEXUS_PW }} 24 | run: | 25 | cd java 26 | echo "apache.snapshots.https$ASF_USERNAME$ASF_PASSWORD" > settings.xml 27 | ./mvnw --settings settings.xml -nsu -ntp -DskipTests deploy 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | target 3 | .classpath* 4 | .project 5 | .settings 6 | *~ 7 | *.iml 8 | dependency-reduced-pom.xml 9 | *.ipr 10 | *.iws 11 | .idea 12 | .DS_Store 13 | .java-version 14 | java/bench/data 15 | *.swp 16 | .cache/* 17 | -------------------------------------------------------------------------------- /.markdownlint.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | MD013: false 19 | -------------------------------------------------------------------------------- /.markdownlintignore: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | site 19 | -------------------------------------------------------------------------------- /.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/.nojekyll -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Apache ORC 2 | Copyright 2013 and onwards The Apache Software Foundation. 3 | 4 | This product includes software developed by The Apache Software 5 | Foundation (http://www.apache.org/). 6 | 7 | This product includes software developed by Hewlett-Packard: 8 | (c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P 9 | 10 | -------------------------------------------------------------------------------- /c++/build-support/README.md: -------------------------------------------------------------------------------- 1 | # Build support 2 | 3 | The Python scripts under the folder provide capabilities for formatting code. 4 | Make sure you've installed `clang-format-13`, `clang-tidy-13` and `clang-apply-replacements-13` and cmake could find them. 5 | We enforce the version of tools because different versions of tools may generate different results. 6 | 7 | ## clang-format 8 | 9 | To use `run_clang_format.py` you could act like below: 10 | 11 | ```shell 12 | mkdir build 13 | cd build 14 | cmake .. -DBUILD_JAVA=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang -DCMAKE_EXPORT_COMPILE_COMMANDS=1 -DORC_ENABLE_CLANG_TOOLS=1 15 | make check-format # Do checks only 16 | make format # This would apply suggested changes, take care! 17 | ``` 18 | 19 | ## clang-tidy 20 | 21 | To use `run_clang_tidy.py` you could act like below: 22 | 23 | ```shell 24 | mkdir build 25 | cd build 26 | cmake .. -DBUILD_JAVA=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang -DCMAKE_EXPORT_COMPILE_COMMANDS=1 -DORC_ENABLE_CLANG_TOOLS=1 27 | make -j`nproc` # Important 28 | make check-clang-tidy # Do checks only 29 | make fix-clang-tidy # This would apply suggested changes, take care! 30 | ``` 31 | -------------------------------------------------------------------------------- /c++/include/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | configure_file ( 19 | "orc/orc-config.hh.in" 20 | "orc/orc-config.hh" 21 | ) 22 | 23 | install(FILES 24 | "${CMAKE_CURRENT_BINARY_DIR}/orc/orc-config.hh" 25 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/orc" 26 | ) 27 | 28 | install(DIRECTORY 29 | "orc/" 30 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/orc" 31 | FILES_MATCHING PATTERN "*.hh" 32 | ) 33 | -------------------------------------------------------------------------------- /c++/include/orc/orc-config.hh.in: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #ifndef ORC_CONFIG_HH 20 | #define ORC_CONFIG_HH 21 | 22 | #define ORC_VERSION "@ORC_VERSION@" 23 | 24 | #cmakedefine ORC_CXX_HAS_CSTDINT 25 | 26 | #ifdef ORC_CXX_HAS_CSTDINT 27 | #include 28 | #else 29 | #include 30 | #endif 31 | 32 | // Following MACROS should be keeped for backward compatibility. 33 | #define ORC_NOEXCEPT noexcept 34 | #define ORC_NULLPTR nullptr 35 | #define ORC_OVERRIDE override 36 | #define ORC_UNIQUE_PTR std::unique_ptr 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /c++/libs/libhdfspp/imported_timestamp: -------------------------------------------------------------------------------- 1 | Wed Aug 30 10:56:51 EDT 2017 2 | HDFS-10787 3 | commit 9587bb04a818a2661e264f619b09c15ce10ff38e 4 | Author: Anatoli Shein 5 | Date: Wed Aug 30 10:49:42 2017 -0400 6 | 7 | fixed warnings3 8 | diffs: -------------- 9 | -------------- 10 | Wed Aug 30 10:56:51 EDT 2017 11 | -------------------------------------------------------------------------------- /c++/libs/libhdfspp/libhdfspp.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/c++/libs/libhdfspp/libhdfspp.tar.gz -------------------------------------------------------------------------------- /c++/libs/libhdfspp/pull_hdfs.sh: -------------------------------------------------------------------------------- 1 | if [ -z "$1" ]; then 2 | echo "Usage: pull_hdfs [path_to_hdfs_git_root]" 3 | exit 1; 4 | fi 5 | if [ ! -d "$1" ]; then 6 | echo "$1 is not a directory" 7 | fi 8 | if [ ! -d "$1/hadoop-hdfs-project" ]; then 9 | echo "$1 is not the root of a hadoop git checkout" 10 | fi 11 | 12 | HADOOP_ROOT=$1 13 | echo HADOOP_ROOT=$HADOOP_ROOT 14 | OUT=$(readlink -m `dirname $0`) 15 | echo OUT=$OUT 16 | TS=$OUT/imported_timestamp 17 | 18 | cd $HADOOP_ROOT && 19 | mvn -pl :hadoop-hdfs-native-client -Pnative compile -Dnative_make_args="copy_hadoop_files" 20 | (date > $TS; git rev-parse --abbrev-ref HEAD >> $TS; git log -n 1 >> $TS; \ 21 | echo "diffs: --------------" >> $TS; git diff HEAD >> $TS; \ 22 | echo " --------------" >> $TS) 23 | cd $OUT && 24 | #Delete everything except for pull_hdfs.sh and imported_timestamp 25 | find . ! -name 'pull_hdfs.sh' ! -name 'imported_timestamp' ! -name '.' ! -name '..' -exec rm -rf {} + && 26 | cp -R $HADOOP_ROOT/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp . && 27 | cp -R $HADOOP_ROOT/hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/libhdfspp/extern libhdfspp/ && 28 | cd libhdfspp && 29 | tar -czf ../libhdfspp.tar.gz * && 30 | cd .. && 31 | rm -rf libhdfspp && 32 | date >> $TS -------------------------------------------------------------------------------- /c++/src/Bpacking.hh: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #ifndef ORC_BPACKING_HH 20 | #define ORC_BPACKING_HH 21 | 22 | #include 23 | 24 | namespace orc { 25 | class RleDecoderV2; 26 | 27 | class BitUnpack { 28 | public: 29 | static void readLongs(RleDecoderV2* decoder, int64_t* data, uint64_t offset, uint64_t len, 30 | uint64_t fbs); 31 | }; 32 | } // namespace orc 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /c++/src/Murmur3.hh: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #ifndef ORC_MURMUR3_HH 20 | #define ORC_MURMUR3_HH 21 | 22 | #include "orc/orc-config.hh" 23 | 24 | namespace orc { 25 | 26 | class Murmur3 { 27 | public: 28 | static const uint32_t DEFAULT_SEED = 104729; 29 | static const uint64_t NULL_HASHCODE = 2862933555777941757LL; 30 | 31 | static uint64_t hash64(const uint8_t* data, uint32_t len); 32 | 33 | private: 34 | static uint64_t fmix64(uint64_t value); 35 | static uint64_t hash64(const uint8_t* data, uint32_t len, uint32_t seed); 36 | }; 37 | 38 | } // namespace orc 39 | 40 | #endif // ORC_MURMUR3_HH 41 | -------------------------------------------------------------------------------- /c++/src/wrap/coded-stream-wrapper.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef CODED_STREAM_WRAPPER_HH 21 | #define CODED_STREAM_WRAPPER_HH 22 | 23 | #include "Adaptor.hh" 24 | 25 | DIAGNOSTIC_PUSH 26 | 27 | #ifdef __clang__ 28 | DIAGNOSTIC_IGNORE("-Wshorten-64-to-32") 29 | DIAGNOSTIC_IGNORE("-Wreserved-id-macro") 30 | #endif 31 | 32 | #if defined(__GNUC__) || defined(__clang__) 33 | DIAGNOSTIC_IGNORE("-Wconversion") 34 | #endif 35 | 36 | #include 37 | 38 | DIAGNOSTIC_POP 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /c++/src/wrap/snappy-wrapper.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef SNAPPY_WRAPPER_HH 21 | #define SNAPPY_WRAPPER_HH 22 | 23 | #include "Adaptor.hh" 24 | 25 | DIAGNOSTIC_PUSH 26 | 27 | #ifdef __clang__ 28 | DIAGNOSTIC_IGNORE("-Wreserved-id-macro") 29 | #endif 30 | 31 | #include 32 | 33 | DIAGNOSTIC_POP 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /c++/src/wrap/zero-copy-stream-wrapper.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef ZERO_COPY_STREAM_WRAPPER_HH 21 | #define ZERO_COPY_STREAM_WRAPPER_HH 22 | 23 | #include "Adaptor.hh" 24 | 25 | DIAGNOSTIC_PUSH 26 | 27 | #if defined(__GNUC__) || defined(__clang__) 28 | DIAGNOSTIC_IGNORE("-Wdeprecated") 29 | DIAGNOSTIC_IGNORE("-Wpadded") 30 | DIAGNOSTIC_IGNORE("-Wunused-parameter") 31 | #endif 32 | 33 | #ifdef __clang__ 34 | DIAGNOSTIC_IGNORE("-Wreserved-id-macro") 35 | #endif 36 | 37 | #include 38 | 39 | DIAGNOSTIC_POP 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /c++/test/MemoryInputStream.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #include "MemoryInputStream.hh" 20 | 21 | namespace orc { 22 | MemoryInputStream::~MemoryInputStream() {} 23 | } // namespace orc 24 | -------------------------------------------------------------------------------- /c++/test/MemoryOutputStream.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #include "MemoryOutputStream.hh" 20 | 21 | namespace orc { 22 | 23 | MemoryOutputStream::~MemoryOutputStream() { 24 | delete[] data_; 25 | } 26 | 27 | void MemoryOutputStream::write(const void* buf, size_t size) { 28 | memcpy(data_ + length_, buf, size); 29 | length_ += size; 30 | } 31 | } // namespace orc 32 | -------------------------------------------------------------------------------- /c++/test/OrcTest.hh: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #ifndef ORC_TEST_HH 20 | #define ORC_TEST_HH 21 | 22 | // determine the size of an array 23 | #define ARRAY_SIZE(array) (sizeof(array) / sizeof(*array)) 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /c++/test/TestDriver.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #include "orc/orc-config.hh" 20 | 21 | #include "wrap/gtest-wrapper.h" 22 | #include "wrap/orc-proto-wrapper.hh" 23 | 24 | #include 25 | 26 | GTEST_API_ int main(int argc, char** argv) { 27 | GOOGLE_PROTOBUF_VERIFY_VERSION; 28 | std::cout << "ORC version: " << ORC_VERSION << "\n"; 29 | testing::InitGoogleTest(&argc, argv); 30 | int result = RUN_ALL_TESTS(); 31 | google::protobuf::ShutdownProtobufLibrary(); 32 | return result; 33 | } 34 | -------------------------------------------------------------------------------- /cmake_modules/CheckSourceCompiles.cmake: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX17_FLAGS} ${WARN_FLAGS}") 19 | 20 | INCLUDE(CheckCXXSourceCompiles) 21 | 22 | CHECK_CXX_SOURCE_COMPILES(" 23 | #include 24 | int main(int, char*[]) { }" 25 | ORC_CXX_HAS_CSTDINT 26 | ) 27 | 28 | CHECK_CXX_SOURCE_COMPILES(" 29 | #include 30 | int main(void) { 31 | thread_local int s; 32 | return s; 33 | }" 34 | ORC_CXX_HAS_THREAD_LOCAL 35 | ) 36 | -------------------------------------------------------------------------------- /conan/all/conandata.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | sources: 19 | "2.0.0": 20 | url: "https://dlcdn.apache.org/orc/orc-2.0.0/orc-2.0.0.tar.gz" 21 | sha256: "9107730919c29eb39efaff1b9e36166634d1d4d9477e5fee76bfd6a8fec317df" 22 | "1.9.2": 23 | url: "https://dlcdn.apache.org/orc/orc-1.9.2/orc-1.9.2.tar.gz" 24 | sha256: "7f46f2c184ecefd6791f1a53fb062286818bd8710c3f08b94dd3cac365e240ee" 25 | "1.8.6": 26 | url: "https://dlcdn.apache.org/orc/orc-1.8.6/orc-1.8.6.tar.gz" 27 | sha256: "5675b18118df4dd7f86cc6ba859ed75b425ea1b7ddff805e1d671a17fd57d7f7" 28 | "1.7.10": 29 | url: "https://dlcdn.apache.org/orc/orc-1.7.10/orc-1.7.10.tar.gz" 30 | sha256: "85aef9368dc9bcdffaaf10010b66dfe053ce22f30b64854f63852248164686a3" 31 | -------------------------------------------------------------------------------- /conan/all/test_package/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | cmake_minimum_required(VERSION 3.15) 19 | project(test_package LANGUAGES CXX) 20 | 21 | find_package(orc REQUIRED CONFIG) 22 | 23 | add_executable(${PROJECT_NAME} test_package.cpp) 24 | target_link_libraries(${PROJECT_NAME} PRIVATE orc::orc) 25 | target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_17) 26 | -------------------------------------------------------------------------------- /conan/all/test_package/test_package.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #include 20 | #include 21 | 22 | int main() { 23 | auto orcType = orc::Type::buildTypeFromString("struct"); 24 | std::cout << orcType->toString() << std::endl; 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /conan/config.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | versions: 19 | "2.0.0": 20 | folder: all 21 | "1.9.2": 22 | folder: all 23 | "1.8.6": 24 | folder: all 25 | "1.7.10": 26 | folder: all 27 | -------------------------------------------------------------------------------- /docker/.gitignore: -------------------------------------------------------------------------------- 1 | logs -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Docker Test 2 | 3 | ## Supported OSes 4 | 5 | * Debian 11 and 12 6 | * Fedora 37 7 | * Ubuntu 22 and 24 8 | * Oracle Linux 9 9 | * Amazon Linux 2023 10 | 11 | ## Pre-built Images 12 | 13 | Apache ORC community provides a set of pre-built docker images and uses it during testing. 14 | 15 | docker pull apache/orc-dev:ubuntu22 16 | 17 | You can find all tags here. 18 | 19 | https://hub.docker.com/r/apache/orc-dev/tags 20 | 21 | ## Test 22 | 23 | To test against all of the Linux OSes against Apache's main branch: 24 | 25 | cd docker 26 | ./run-all.sh apache main 27 | 28 | Using `local` as the owner will cause the scripts to use the local repository. 29 | 30 | The scripts are: 31 | 32 | * `run-all.sh` *owner* *branch* - test the given owner's branch on all OSes 33 | * `run-one.sh` *owner* *branch* *os* - test the owner's branch on one OS 34 | * `reinit.sh` - rebuild all of the base images without the image cache 35 | 36 | `run-all.sh`, `run-one.sh` and `reinit.sh` tests both on jdk17 and 21 across OSes 37 | 38 | A base image for each OS is built using: 39 | 40 | cd docker/$os 41 | FOR jdk21: docker build -t "orc-$os-jdk21" --build-arg jdk=21 . 42 | 43 | ## Clean up 44 | 45 | docker container prune 46 | docker image prune 47 | -------------------------------------------------------------------------------- /docker/os-list.txt: -------------------------------------------------------------------------------- 1 | debian11 2 | debian12 3 | ubuntu22 4 | ubuntu24 5 | fedora37 6 | oraclelinux9 7 | amazonlinux23 8 | -------------------------------------------------------------------------------- /docker/reinit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | start=`date` 19 | 20 | TARGET=${@:-`cat os-list.txt`} 21 | echo "Target:" $TARGET 22 | 23 | for build in $TARGET; do 24 | OS=$(echo "$build" | cut -d '_' -f1) 25 | REST=$(echo "$build" | cut -d '_' -f2- -s) 26 | if [ -z "$REST" ]; then 27 | ARGS="" 28 | else 29 | ARGS=$(echo "$REST" | sed -e 's/^/--build-arg /' -e 's/_/ --build-arg /g') 30 | fi 31 | TAG=$(echo "apache/orc-dev:$build" | sed -e 's/=/-/g') 32 | echo "Re-initialize $TAG" 33 | ( cd $OS && docker build --no-cache -t "$TAG" $ARGS . ) 34 | done 35 | echo "Start: $start" 36 | echo "End:" `date` 37 | -------------------------------------------------------------------------------- /examples/TestCSVFileImport.test10rows.csv: -------------------------------------------------------------------------------- 1 | 0,a,0.0 2 | 1,b,1.1 3 | 2,c,2.2 4 | 3,d, 5 | 4,,4.4 6 | ,f,5.5 7 | ,, 8 | 7,h,7.7 9 | 8,i,8.8 10 | 9,j,9.9 -------------------------------------------------------------------------------- /examples/TestCSVFileImport.testTimezoneOption.csv: -------------------------------------------------------------------------------- 1 | 2021-12-27 00:00:00.000 -------------------------------------------------------------------------------- /examples/TestOrcFile.columnProjection.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.columnProjection.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.emptyFile.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.emptyFile.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.metaData.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.metaData.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.test1.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.test1.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.testDate1900.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.testDate1900.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.testDate2038.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.testDate2038.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.testMemoryManagementV11.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.testMemoryManagementV11.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.testMemoryManagementV12.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.testMemoryManagementV12.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.testPredicatePushdown.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.testPredicatePushdown.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.testSargSkipPickupGroupWithoutIndexCPlusPlus.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.testSargSkipPickupGroupWithoutIndexCPlusPlus.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.testSargSkipPickupGroupWithoutIndexJava.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.testSargSkipPickupGroupWithoutIndexJava.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.testSeek.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.testSeek.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.testSnappy.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.testSnappy.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.testStringAndBinaryStatistics.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.testStringAndBinaryStatistics.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.testStripeLevelStats.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.testStripeLevelStats.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.testTimestamp.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.testTimestamp.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.testUnionAndTimestamp.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.testUnionAndTimestamp.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.testWithoutCompressionBlockSize.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.testWithoutCompressionBlockSize.orc -------------------------------------------------------------------------------- /examples/TestOrcFile.testWithoutIndex.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestOrcFile.testWithoutIndex.orc -------------------------------------------------------------------------------- /examples/TestStringDictionary.testRowIndex.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestStringDictionary.testRowIndex.orc -------------------------------------------------------------------------------- /examples/TestVectorOrcFile.testLz4.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestVectorOrcFile.testLz4.orc -------------------------------------------------------------------------------- /examples/TestVectorOrcFile.testLzo.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestVectorOrcFile.testLzo.orc -------------------------------------------------------------------------------- /examples/TestVectorOrcFile.testZstd.0.12.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/TestVectorOrcFile.testZstd.0.12.orc -------------------------------------------------------------------------------- /examples/bad_bloom_filter_1.6.0.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/bad_bloom_filter_1.6.0.orc -------------------------------------------------------------------------------- /examples/bad_bloom_filter_1.6.11.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/bad_bloom_filter_1.6.11.orc -------------------------------------------------------------------------------- /examples/complextypes_iceberg.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/complextypes_iceberg.orc -------------------------------------------------------------------------------- /examples/corrupt/missing_blob_stream_in_string_dict.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/corrupt/missing_blob_stream_in_string_dict.orc -------------------------------------------------------------------------------- /examples/corrupt/missing_length_stream_in_string_dict.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/corrupt/missing_length_stream_in_string_dict.orc -------------------------------------------------------------------------------- /examples/corrupt/negative_dict_entry_lengths.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/corrupt/negative_dict_entry_lengths.orc -------------------------------------------------------------------------------- /examples/corrupt/stripe_footer_bad_column_encodings.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/corrupt/stripe_footer_bad_column_encodings.orc -------------------------------------------------------------------------------- /examples/decimal.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/decimal.orc -------------------------------------------------------------------------------- /examples/decimal64_v2.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/decimal64_v2.orc -------------------------------------------------------------------------------- /examples/decimal64_v2_cplusplus.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/decimal64_v2_cplusplus.orc -------------------------------------------------------------------------------- /examples/demo-11-none.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/demo-11-none.orc -------------------------------------------------------------------------------- /examples/demo-11-zlib.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/demo-11-zlib.orc -------------------------------------------------------------------------------- /examples/demo-12-zlib.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/demo-12-zlib.orc -------------------------------------------------------------------------------- /examples/encrypted/kms.keystore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/encrypted/kms.keystore -------------------------------------------------------------------------------- /examples/encrypted/sample1.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/encrypted/sample1.orc -------------------------------------------------------------------------------- /examples/encrypted/sample2.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/encrypted/sample2.orc -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.columnProjection.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.columnProjection.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.emptyFile.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.emptyFile.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.metaData.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.metaData.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.test1.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.test1.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.testDate1900.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.testDate1900.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.testDate2038.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.testDate2038.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.testMemoryManagementV11.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.testMemoryManagementV11.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.testMemoryManagementV12.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.testMemoryManagementV12.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.testPredicatePushdown.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.testPredicatePushdown.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.testSeek.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.testSeek.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.testSnappy.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.testSnappy.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.testStringAndBinaryStatistics.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.testStringAndBinaryStatistics.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.testStripeLevelStats.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.testStripeLevelStats.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.testTimestamp.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.testTimestamp.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.testUnionAndTimestamp.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.testUnionAndTimestamp.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestOrcFile.testWithoutIndex.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestOrcFile.testWithoutIndex.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestStringDictionary.testRowIndex.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestStringDictionary.testRowIndex.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestVectorOrcFile.testLz4.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestVectorOrcFile.testLz4.jsn.gz -------------------------------------------------------------------------------- /examples/expected/TestVectorOrcFile.testLzo.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/TestVectorOrcFile.testLzo.jsn.gz -------------------------------------------------------------------------------- /examples/expected/decimal.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/decimal.jsn.gz -------------------------------------------------------------------------------- /examples/expected/demo-12-zlib.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/demo-12-zlib.jsn.gz -------------------------------------------------------------------------------- /examples/expected/nulls-at-end-snappy.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/nulls-at-end-snappy.jsn.gz -------------------------------------------------------------------------------- /examples/expected/orc-file-11-format.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/orc-file-11-format.jsn.gz -------------------------------------------------------------------------------- /examples/expected/orc_index_int_string.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/orc_index_int_string.jsn.gz -------------------------------------------------------------------------------- /examples/expected/orc_split_elim.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/orc_split_elim.jsn.gz -------------------------------------------------------------------------------- /examples/expected/orc_split_elim_cpp.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/orc_split_elim_cpp.jsn.gz -------------------------------------------------------------------------------- /examples/expected/orc_split_elim_new.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/orc_split_elim_new.jsn.gz -------------------------------------------------------------------------------- /examples/expected/over1k_bloom.jsn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/expected/over1k_bloom.jsn.gz -------------------------------------------------------------------------------- /examples/nulls-at-end-snappy.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/nulls-at-end-snappy.orc -------------------------------------------------------------------------------- /examples/orc-file-11-format.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/orc-file-11-format.orc -------------------------------------------------------------------------------- /examples/orc_index_int_string.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/orc_index_int_string.orc -------------------------------------------------------------------------------- /examples/orc_no_format.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/orc_no_format.orc -------------------------------------------------------------------------------- /examples/orc_split_elim.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/orc_split_elim.orc -------------------------------------------------------------------------------- /examples/orc_split_elim_cpp.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/orc_split_elim_cpp.orc -------------------------------------------------------------------------------- /examples/orc_split_elim_new.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/orc_split_elim_new.orc -------------------------------------------------------------------------------- /examples/over1k_bloom.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/over1k_bloom.orc -------------------------------------------------------------------------------- /examples/version1999.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/version1999.orc -------------------------------------------------------------------------------- /examples/zero.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/examples/zero.orc -------------------------------------------------------------------------------- /java/.mvn/jvm.config: -------------------------------------------------------------------------------- 1 | --enable-native-access=ALL-UNNAMED 2 | -------------------------------------------------------------------------------- /java/bench/.gitignore: -------------------------------------------------------------------------------- 1 | .*.crc 2 | *.json.gz 3 | *.avro 4 | *.parquet 5 | *.orc 6 | -------------------------------------------------------------------------------- /java/bench/core/src/assembly/uber.xml: -------------------------------------------------------------------------------- 1 | 14 | 15 | uber 16 | 17 | jar 18 | 19 | false 20 | 21 | 22 | / 23 | true 24 | true 25 | runtime 26 | 27 | 28 | 29 | 30 | metaInf-services 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | *

10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | *

12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.orc.bench.core.convert; 20 | 21 | import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; 22 | 23 | import java.io.IOException; 24 | 25 | /** 26 | * Generic interface for reading data. 27 | */ 28 | public interface BatchReader extends AutoCloseable { 29 | 30 | boolean nextBatch(VectorizedRowBatch batch) throws IOException; 31 | 32 | @Override 33 | void close() throws IOException; 34 | } 35 | -------------------------------------------------------------------------------- /java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | *

10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | *

12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.orc.bench.core.convert; 20 | 21 | import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; 22 | 23 | import java.io.Closeable; 24 | import java.io.IOException; 25 | 26 | /** 27 | * Generic interface for writing data. 28 | */ 29 | public interface BatchWriter extends Closeable { 30 | 31 | void writeBatch(VectorizedRowBatch batch) throws IOException; 32 | 33 | @Override 34 | void close() throws IOException; 35 | } 36 | -------------------------------------------------------------------------------- /java/bench/core/src/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | log4j.rootLogger=WARN, CONSOLE 14 | 15 | # CONSOLE is set to be a ConsoleAppender using a PatternLayout 16 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 17 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 18 | log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %m%n 19 | -------------------------------------------------------------------------------- /java/bench/core/src/resources/sales.schema: -------------------------------------------------------------------------------- 1 | struct< 2 | sales_id:bigint, 3 | customer_id:bigint, 4 | col3:bigint, 5 | item_category:bigint, 6 | item_count:bigint, 7 | change_ts:timestamp, 8 | store_location:string, 9 | associate_id:string, 10 | col9:bigint, 11 | rebate_id:string, 12 | create_ts:timestamp, 13 | col13:bigint, 14 | size:string, 15 | col14:bigint, 16 | fulfilled:boolean, 17 | global_id:string, 18 | col17:string, 19 | col18:string, 20 | col19:bigint, 21 | has_rebate:boolean, 22 | col21:array< 23 | struct< 24 | sub1:bigint, 25 | sub2:string, 26 | sub3:string, 27 | sub4:bigint, 28 | sub5:bigint, 29 | sub6:string>>, 30 | vendor_id:string, 31 | country:string, 32 | backend_version:string, 33 | col41:bigint, 34 | col42:bigint, 35 | col43:bigint, 36 | col44:bigint, 37 | col45:bigint, 38 | col46:bigint, 39 | col47:bigint, 40 | col48:bigint, 41 | col49:string, 42 | col50:string, 43 | col51:bigint, 44 | col52:bigint, 45 | col53:bigint, 46 | col54:bigint, 47 | col55:string, 48 | col56:timestamp, 49 | col57:timestamp, 50 | md5:bigint, 51 | col59:bigint, 52 | col69:timestamp, 53 | col61:string, 54 | col62:string, 55 | col63:timestamp, 56 | col64:bigint> 57 | -------------------------------------------------------------------------------- /java/bench/core/src/resources/taxi.schema: -------------------------------------------------------------------------------- 1 | struct< 2 | VendorID: bigint, 3 | tpep_pickup_datetime: timestamp, 4 | tpep_dropoff_datetime: timestamp, 5 | passenger_count: bigint, 6 | trip_distance: double, 7 | RatecodeID: bigint, 8 | store_and_fwd_flag: string, 9 | PULocationID: bigint, 10 | DOLocationID: bigint, 11 | payment_type: bigint, 12 | fare_amount: decimal(10,2), 13 | extra: decimal(10,2), 14 | mta_tax: decimal(10,2), 15 | tip_amount: decimal(10,2), 16 | tolls_amount: decimal(10,2), 17 | improvement_surcharge: decimal(10,2), 18 | total_amount: decimal(10,2), 19 | congestion_surcharge: int, 20 | airport_fee: int 21 | > 22 | -------------------------------------------------------------------------------- /java/bench/fetch-data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zsh 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | set -e 18 | mkdir -p data/sources/taxi 19 | (cd data/sources/taxi; wget https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2015-11.parquet ) 20 | (cd data/sources/taxi; wget https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2015-12.parquet ) 21 | 22 | mkdir -p data/sources/github 23 | (cd data/sources/github; wget http://data.gharchive.org/2015-11-{01..15}-{0..23}.json.gz) 24 | -------------------------------------------------------------------------------- /java/core/src/java/org/apache/orc/BinaryColumnStatistics.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.orc; 19 | 20 | /** 21 | * Statistics for binary columns. 22 | */ 23 | public interface BinaryColumnStatistics extends ColumnStatistics { 24 | long getSum(); 25 | } 26 | -------------------------------------------------------------------------------- /java/core/src/java/org/apache/orc/BooleanColumnStatistics.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.orc; 19 | 20 | /** 21 | * Statistics for boolean columns. 22 | */ 23 | public interface BooleanColumnStatistics extends ColumnStatistics { 24 | long getFalseCount(); 25 | 26 | long getTrueCount(); 27 | } 28 | -------------------------------------------------------------------------------- /java/core/src/java/org/apache/orc/CompressionKind.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.orc; 20 | 21 | /** 22 | * An enumeration that lists the generic compression algorithms that 23 | * can be applied to ORC files. 24 | */ 25 | public enum CompressionKind { 26 | NONE, ZLIB, SNAPPY, LZO, LZ4, ZSTD, BROTLI 27 | } 28 | -------------------------------------------------------------------------------- /java/core/src/java/org/apache/orc/DataMaskDescription.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.orc; 20 | 21 | /** 22 | * Information about the DataMask used to mask the unencrypted data. 23 | */ 24 | public interface DataMaskDescription { 25 | 26 | /** 27 | * The name of the mask. 28 | * @return the name 29 | */ 30 | String getName(); 31 | 32 | /** 33 | * The parameters for the mask 34 | * @return the array of parameters 35 | */ 36 | String[] getParameters(); 37 | 38 | /** 39 | * Get the list of columns that use this mask. 40 | * @return the list of columns 41 | */ 42 | TypeDescription[] getColumns(); 43 | } 44 | -------------------------------------------------------------------------------- /java/core/src/java/org/apache/orc/FileFormatException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | *

10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | *

12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.orc; 19 | 20 | import java.io.IOException; 21 | 22 | /** 23 | * Thrown when an invalid file format is encountered. 24 | */ 25 | public class FileFormatException extends IOException { 26 | 27 | public FileFormatException(String errMsg) { 28 | super(errMsg); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /java/core/src/java/org/apache/orc/impl/DirectDecompressionCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.orc.impl; 19 | 20 | import org.apache.orc.CompressionCodec; 21 | 22 | import java.io.IOException; 23 | import java.nio.ByteBuffer; 24 | 25 | public interface DirectDecompressionCodec extends CompressionCodec { 26 | boolean isAvailable(); 27 | void directDecompress(ByteBuffer in, ByteBuffer out) throws IOException; 28 | } 29 | -------------------------------------------------------------------------------- /java/core/src/java/org/apache/orc/impl/MemoryManager.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.orc.impl; 20 | 21 | import org.apache.hadoop.conf.Configuration; 22 | 23 | /** 24 | * Shim for backwards compatibility with Hive 25 | */ 26 | @Deprecated 27 | public class MemoryManager extends MemoryManagerImpl { 28 | 29 | public MemoryManager(Configuration conf) { 30 | super(conf); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /java/core/src/java/org/apache/orc/impl/PositionProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.orc.impl; 20 | 21 | /** 22 | * An interface used for seeking to a row index. 23 | */ 24 | public interface PositionProvider { 25 | long getNext(); 26 | } 27 | -------------------------------------------------------------------------------- /java/core/src/java/org/apache/orc/impl/PositionRecorder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.orc.impl; 19 | 20 | /** 21 | * An interface for recording positions in a stream. 22 | */ 23 | public interface PositionRecorder { 24 | void addPosition(long offset); 25 | } 26 | -------------------------------------------------------------------------------- /java/core/src/java/org/apache/orc/impl/WriterInternal.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.orc.impl; 20 | 21 | import org.apache.orc.Writer; 22 | 23 | /** 24 | * The ORC internal API to the writer. 25 | */ 26 | public interface WriterInternal extends Writer { 27 | 28 | /** 29 | * Increase the buffer size for this writer. 30 | * This function is internal only and should only be called by the 31 | * ORC file merger. 32 | * @param newSize the new buffer size. 33 | */ 34 | void increaseCompressionSize(int newSize); 35 | 36 | } 37 | -------------------------------------------------------------------------------- /java/core/src/java/org/apache/orc/impl/mask/NullifyMask.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.orc.impl.mask; 19 | 20 | import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; 21 | import org.apache.orc.DataMask; 22 | 23 | /** 24 | * Masking routine that converts every value to NULL. 25 | */ 26 | public class NullifyMask implements DataMask { 27 | 28 | @Override 29 | public void maskData(ColumnVector original, ColumnVector masked, 30 | int start, int length) { 31 | masked.noNulls = false; 32 | masked.isRepeating = true; 33 | masked.isNull[0] = true; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /java/core/src/resources/META-INF/services/org.apache.orc.DataMask$Provider: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | org.apache.orc.impl.mask.MaskProvider 16 | -------------------------------------------------------------------------------- /java/core/src/resources/META-INF/services/org.apache.orc.impl.KeyProvider$Factory: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | org.apache.orc.impl.CryptoUtils$HadoopKeyProviderFactory 16 | -------------------------------------------------------------------------------- /java/core/src/test/resources/col.dot.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/java/core/src/test/resources/col.dot.orc -------------------------------------------------------------------------------- /java/core/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | log4j.rootLogger=WARN,stdout 14 | 15 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 16 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 17 | log4j.appender.stdout.layout.ConversionPattern=%p\t%d{ISO8601}\t%r\t%c\t[%t]\t%m%n 18 | 19 | # Suppress the warnings about native io not being available 20 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR 21 | -------------------------------------------------------------------------------- /java/core/src/test/resources/orc-file-11-format.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/java/core/src/test/resources/orc-file-11-format.orc -------------------------------------------------------------------------------- /java/core/src/test/resources/orc-file-dst-no-timezone.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/java/core/src/test/resources/orc-file-dst-no-timezone.orc -------------------------------------------------------------------------------- /java/core/src/test/resources/orc-file-no-double-statistic.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/java/core/src/test/resources/orc-file-no-double-statistic.orc -------------------------------------------------------------------------------- /java/core/src/test/resources/orc-file-no-timezone.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/java/core/src/test/resources/orc-file-no-timezone.orc -------------------------------------------------------------------------------- /java/core/src/test/resources/orc_corrupt_zlib.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/java/core/src/test/resources/orc_corrupt_zlib.orc -------------------------------------------------------------------------------- /java/core/src/test/resources/orc_split_elim.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/java/core/src/test/resources/orc_split_elim.orc -------------------------------------------------------------------------------- /java/examples/src/assembly/uber.xml: -------------------------------------------------------------------------------- 1 | 14 | 15 | uber 16 | 17 | jar 18 | 19 | false 20 | 21 | 22 | / 23 | true 24 | true 25 | runtime 26 | 27 | 28 | 29 | 30 | metaInf-services 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /java/mapreduce/src/test/resources/acid5k.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/java/mapreduce/src/test/resources/acid5k.orc -------------------------------------------------------------------------------- /java/mapreduce/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | log4j.rootLogger=WARN,stdout 14 | 15 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 16 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 17 | log4j.appender.stdout.layout.ConversionPattern=%p\t%d{ISO8601}\t%r\t%c\t[%t]\t%m%n 18 | 19 | # Suppress the warnings about native io not being available 20 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR 21 | -------------------------------------------------------------------------------- /java/shims/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | log4j.rootLogger=WARN,stdout 14 | 15 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 16 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 17 | log4j.appender.stdout.layout.ConversionPattern=%p\t%d{ISO8601}\t%r\t%c\t[%t]\t%m%n 18 | 19 | # Suppress the warnings about native io not being available 20 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR 21 | -------------------------------------------------------------------------------- /java/spotbugs-include.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /java/tools/src/assembly/uber.xml: -------------------------------------------------------------------------------- 1 | 14 | 15 | uber 16 | 17 | jar 18 | 19 | false 20 | 21 | 22 | / 23 | true 24 | true 25 | runtime 26 | 27 | 28 | 29 | 30 | metaInf-services 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /java/tools/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | org.apache.orc.impl.FakeKeyProvider$Factory 17 | -------------------------------------------------------------------------------- /java/tools/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | log4j.rootLogger=WARN,stdout 14 | 15 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 16 | log4j.appender.stdout.Target = System.err 17 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 18 | log4j.appender.stdout.layout.ConversionPattern=%p\t%d{ISO8601}\t%r\t%c\t[%t]\t%m%n 19 | 20 | # Suppress the warnings about native io not being available 21 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR 22 | -------------------------------------------------------------------------------- /site/.dockerignore: -------------------------------------------------------------------------------- 1 | .sass-cache 2 | *~ 3 | Dockerfile 4 | Gemfile.lock 5 | target 6 | -------------------------------------------------------------------------------- /site/.gitignore: -------------------------------------------------------------------------------- 1 | .sass-cache 2 | Gemfile.lock 3 | -------------------------------------------------------------------------------- /site/.htaccess: -------------------------------------------------------------------------------- 1 | # .htaccess for ORC 2 | RedirectMatch Permanent ^/jira$ https://issues.apache.org/jira/browse/orc 3 | RedirectMatch Permanent ^/github$ https://github.com/apache/orc 4 | RedirectMatch Permanent ^/bugs$ https://issues.apache.org/jira/browse/orc 5 | RedirectMatch Permanent ^/issues$ https://github.com/apache/orc/issues 6 | RedirectMatch Permanent ^/milestones$ https://github.com/apache/orc/milestones 7 | RedirectMatch Permanent ^/downloads$ https://downloads.apache.org/orc/ 8 | RedirectMatch Permanent ^/src$ https://github.com/apache/orc 9 | RedirectMatch Permanent ^/web-src$ https://github.com/apache/orc/tree/main/site 10 | RewriteEngine On 11 | RewriteCond %{HTTPS} off 12 | RewriteRule ^(.*)$ https://%{HTTP_HOST}%{REQUEST_URI} [L,R=301] 13 | -------------------------------------------------------------------------------- /site/Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | gem 'jekyll', "~> 4.3" 3 | -------------------------------------------------------------------------------- /site/README.md: -------------------------------------------------------------------------------- 1 | # Apache ORC docs site 2 | 3 | This directory contains the code for the Apache ORC web site, 4 | [orc.apache.org](https://orc.apache.org/). The easiest way to build 5 | the site is to use docker to use a standard environment. 6 | 7 | ## Setup 8 | 9 | 1. `cd site` 10 | 2. `git clone git@github.com:apache/orc.git -b asf-site target` 11 | 12 | ## Run the docker container with the preview of the site 13 | 14 | 1. `docker run -d --name orc-container -p 4000:4000 -v $PWD:/home/orc/site apache/orc-dev:site` 15 | 16 | ## Browsing 17 | 18 | Look at the site by navigating to 19 | [http://0.0.0.0:4000/](http://0.0.0.0:4000/) . 20 | 21 | ## Pushing to site 22 | 23 | You'll copy the files from the container to the site/target directory and 24 | commit those to the asf-site branch. 25 | 26 | 1. `docker cp orc-container:/home/orc/site/target .` 27 | 2. `cd target` 28 | 3. Commit the files and push to Apache. 29 | 30 | ## Shutting down the docker container 31 | 32 | 1. `docker rm -f orc-container` 33 | -------------------------------------------------------------------------------- /site/_config.yml: -------------------------------------------------------------------------------- 1 | markdown: kramdown 2 | highlighter: rouge 3 | permalink: /news/:year/:month/:day/:title/ 4 | excerpt_separator: "" 5 | encoding: utf-8 6 | 7 | repository: https://github.com/apache/orc 8 | jira: https://issues.apache.org/jira/browse 9 | dist: https://downloads.apache.org/orc 10 | dist_mirror: https://www.apache.org/dyn/closer.cgi/orc 11 | tag_url: https://github.com/apache/orc/releases/tag/rel 12 | dist_archive: https://archive.apache.org/dist/orc 13 | destination: target 14 | exclude: [README.md, Gemfile*, Dockerfile] 15 | keep_files: [.git, .asf.yaml, api, talks/ORC-Deep-Dive-2020.pptx] 16 | highlight_color: "#67cf56" 17 | 18 | collections: 19 | docs: 20 | output: true 21 | -------------------------------------------------------------------------------- /site/_data/docs.yml: -------------------------------------------------------------------------------- 1 | - title: Overview 2 | docs: 3 | - index 4 | - adopters 5 | - types 6 | - indexes 7 | - acid 8 | 9 | - title: Installing 10 | docs: 11 | - building 12 | 13 | - title: Using in Spark 14 | docs: 15 | - spark-ddl 16 | - spark-config 17 | 18 | - title: Using in Python 19 | docs: 20 | - pyarrow 21 | - dask 22 | 23 | - title: Using in Hive 24 | docs: 25 | - hive-ddl 26 | - hive-config 27 | 28 | - title: Using in MapReduce 29 | docs: 30 | - mapred 31 | - mapreduce 32 | 33 | - title: Using ORC Core 34 | docs: 35 | - core-java 36 | - core-cpp 37 | - core-java-config 38 | 39 | - title: Tools 40 | docs: 41 | - cpp-tools 42 | - java-tools 43 | 44 | -------------------------------------------------------------------------------- /site/_docs/dask.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: docs 3 | title: Dask 4 | permalink: /docs/dask.html 5 | --- 6 | 7 | ## How to install 8 | 9 | [Dask](https://dask.org) also supports Apache ORC. 10 | 11 | ``` 12 | pip3 install "dask[dataframe]==2024.12.1" 13 | pip3 install pandas 14 | ``` 15 | 16 | ## How to write and read an ORC file 17 | 18 | ``` 19 | In [1]: import pandas as pd 20 | 21 | In [2]: import dask.dataframe as dd 22 | 23 | In [3]: pf = pd.DataFrame(data={"col1": [1, 2, 3], "col2": ["a", "b", None]}) 24 | 25 | In [4]: dd.to_orc(dd.from_pandas(pf, npartitions=2), path="/tmp/orc") 26 | Out[4]: (None, None) 27 | 28 | In [5]: dd.read_orc(path="/tmp/orc").compute() 29 | Out[5]: 30 | col1 col2 31 | 0 1 a 32 | 1 2 b 33 | 0 3 34 | 35 | In [6]: dd.read_orc(path="/tmp/orc", columns=["col1"]).compute() 36 | Out[6]: 37 | col1 38 | 0 1 39 | 1 2 40 | 0 3 41 | ``` 42 | 43 | [10 Minutes to Dask](https://docs.dask.org/en/stable/10-minutes-to-dask.html) page 44 | provides a short overview. 45 | -------------------------------------------------------------------------------- /site/_docs/hive-ddl.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: docs 3 | title: Hive DDL 4 | permalink: /docs/hive-ddl.html 5 | --- 6 | 7 | ORC is well integrated into Hive, so storing your istari table as ORC 8 | is done by adding "STORED AS ORC". 9 | 10 | ``` 11 | CREATE TABLE istari ( 12 | name STRING, 13 | color STRING 14 | ) STORED AS ORC; 15 | ``` 16 | 17 | To modify a table so that new partitions of the istari table are 18 | stored as ORC files: 19 | 20 | ``` 21 | ALTER TABLE istari SET FILEFORMAT ORC; 22 | ``` 23 | 24 | As of Hive 0.14, users can request an efficient merge of small ORC files 25 | together by issuing a CONCATENATE command on their table or partition. The 26 | files will be merged at the stripe level without reserialization. 27 | 28 | ``` 29 | ALTER TABLE istari [PARTITION partition_spec] CONCATENATE; 30 | ``` 31 | 32 | To get information about an ORC file, use the orcfiledump command. 33 | 34 | ``` 35 | % hive --orcfiledump 36 | ``` 37 | 38 | As of Hive 1.1, to display the data in the ORC file, use: 39 | 40 | ``` 41 | % hive --orcfiledump -d 42 | ``` -------------------------------------------------------------------------------- /site/_docs/indexes.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: docs 3 | title: Indexes 4 | permalink: /docs/indexes.html 5 | --- 6 | 7 | ORC provides three level of indexes within each file: 8 | 9 | * file level - statistics about the values in each column across the entire 10 | file 11 | * stripe level - statistics about the values in each column for each stripe 12 | * row level - statistics about the values in each column for each set of 13 | 10,000 rows within a stripe 14 | 15 | The file and stripe level column statistics are in the file footer so 16 | that they are easy to access to determine if the rest of the file 17 | needs to be read at all. Row level indexes include both the column 18 | statistics for each row group and the position for seeking to the 19 | start of the row group. 20 | 21 | Column statistics always contain the count of values and whether there 22 | are null values present. Most other primitive types include the 23 | minimum and maximum values and for numeric types the sum. As of Hive 24 | 1.2, the indexes can include bloom filters, which provide a much more 25 | selective filter. 26 | 27 | The indexes at all levels are used by the reader using Search 28 | ARGuments or SARGs, which are simplified expressions that restrict the 29 | rows that are of interest. For example, if a query was looking for 30 | people older than 100 years old, the SARG would be "age > 100" and 31 | only files, stripes, or row groups that had people over 100 years old 32 | would be read. -------------------------------------------------------------------------------- /site/_docs/pyarrow.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: docs 3 | title: PyArrow 4 | permalink: /docs/pyarrow.html 5 | --- 6 | 7 | ## How to install 8 | 9 | [Apache Arrow](https://arrow.apache.org) project's [PyArrow](https://pypi.org/project/pyarrow/) is the recommended package. 10 | 11 | ``` 12 | pip3 install pyarrow==18.1.0 13 | pip3 install pandas 14 | ``` 15 | 16 | ## How to write and read an ORC file 17 | 18 | ``` 19 | In [1]: import pyarrow as pa 20 | 21 | In [2]: from pyarrow import orc 22 | 23 | In [3]: orc.write_table(pa.table({"col1": [1, 2, 3], "col2": ["a", "b", None]}), "test.orc", compression="zstd") 24 | 25 | In [4]: orc.read_table("test.orc").to_pandas() 26 | Out[4]: 27 | col1 col2 28 | 0 1 a 29 | 1 2 b 30 | 2 3 None 31 | 32 | In [5]: orc.read_table("test.orc", columns=["col1"]).to_pandas() 33 | Out[5]: 34 | col1 35 | 0 1 36 | 1 2 37 | 2 3 38 | ``` 39 | 40 | [Apache Arrow ORC](https://arrow.apache.org/docs/python/orc.html) page provides more information. 41 | -------------------------------------------------------------------------------- /site/_docs/spark-ddl.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: docs 3 | title: Spark DDL 4 | permalink: /docs/spark-ddl.html 5 | --- 6 | 7 | ORC is well integrated into Spark, so storing your `istari` table as ORC 8 | is done by adding `USING ORC`. 9 | 10 | ``` 11 | CREATE TABLE istari ( 12 | name STRING, 13 | color STRING 14 | ) USING ORC; 15 | ``` 16 | 17 | To get information about an ORC file, use the `orc-tools` command. 18 | 19 | ``` 20 | % orc-tools meta 21 | ``` 22 | 23 | To display the data in the ORC file, use: 24 | 25 | ``` 26 | % orc-tools data 27 | ``` 28 | -------------------------------------------------------------------------------- /site/_includes/anchor_links.html: -------------------------------------------------------------------------------- 1 | 34 | -------------------------------------------------------------------------------- /site/_includes/docs_contents.html: -------------------------------------------------------------------------------- 1 |

2 | 8 |
9 | -------------------------------------------------------------------------------- /site/_includes/docs_contents_mobile.html: -------------------------------------------------------------------------------- 1 |
2 | 10 |
11 | -------------------------------------------------------------------------------- /site/_includes/docs_option.html: -------------------------------------------------------------------------------- 1 | {% assign items = include.items %} 2 | 3 | {% for item in items %} 4 | {% assign item_url = item | prepend:"/docs/" | append:".html" %} 5 | 6 | {% for p in site.docs %} 7 | {% if p.url == item_url %} 8 | 9 | {% endif %} 10 | {% endfor %} 11 | {% endfor %} 12 | -------------------------------------------------------------------------------- /site/_includes/docs_ul.html: -------------------------------------------------------------------------------- 1 | {% assign items = include.items %} 2 | 3 |
    4 | {% for item in items %} 5 | {% assign item_url = item | prepend:"/docs/" | append:".html" %} 6 | 7 | {% if item_url == page.url %} 8 | {% assign c = "current" %} 9 | {% else %} 10 | {% assign c = "" %} 11 | {% endif %} 12 | 13 | {% for p in site.docs %} 14 | {% if p.url == item_url %} 15 |
  • {{ p.title }}
  • 16 | {% break %} 17 | {% endif %} 18 | {% endfor %} 19 | 20 | {% endfor %} 21 |
22 | -------------------------------------------------------------------------------- /site/_includes/footer.html: -------------------------------------------------------------------------------- 1 | 9 | -------------------------------------------------------------------------------- /site/_includes/header.html: -------------------------------------------------------------------------------- 1 |
2 | 5 |
6 |
7 |

8 | 9 | Apache ORC 10 | ORC Logo 11 | 12 |

13 |
14 | 17 |
18 |
19 | -------------------------------------------------------------------------------- /site/_includes/known_issues.md: -------------------------------------------------------------------------------- 1 | {% comment %} 2 | Generates the list of known bugs in a given release 3 | Parameters: 4 | releaseName - the name of the release (eg. 1.2.0) 5 | {% endcomment %} 6 | 7 | Known issues: 8 | 9 | {% for issue in site.data.releases[releaseName]["known-issues"] %} 10 | {% if issue[0] contains 'CVE-' %} 11 | - [{{issue[0]}}](/security/{{issue[0]}}) {{issue[1]}} 12 | {% else %} 13 | - [{{issue[0]}}]({{site.jira}}/{{issue[0]}}) {{issue[1]}} 14 | {% endif %} 15 | {% endfor %} 16 | -------------------------------------------------------------------------------- /site/_includes/news_contents.html: -------------------------------------------------------------------------------- 1 |
2 | 30 |
31 | -------------------------------------------------------------------------------- /site/_includes/news_contents_mobile.html: -------------------------------------------------------------------------------- 1 |
2 | 11 |
12 | -------------------------------------------------------------------------------- /site/_includes/news_item.html: -------------------------------------------------------------------------------- 1 |
2 |

3 | 4 | {{ post.title }} 5 | 6 |

7 | 12 | 20 |
21 | {{ post.content }} 22 |
23 |
24 | -------------------------------------------------------------------------------- /site/_includes/orc_1.1.md: -------------------------------------------------------------------------------- 1 | The major new features in ORC 1.1 are: 2 | 3 | - [ORC-1]({{site.jira}}/ORC-1) Copy the Java ORC code from Hive. 4 | - [ORC-10]({{site.jira}}/ORC-10) Fix the C++ reader to correctly read 5 | timestamps from timezones with different daylight savings rules. 6 | - [ORC-52]({{site.jira}}/ORC-52) Add mapred and mapreduce connectors. 7 | -------------------------------------------------------------------------------- /site/_includes/orc_1.2.md: -------------------------------------------------------------------------------- 1 | The new features of ORC 1.2: 2 | 3 | - [ORC-54]({{site.jira}}/ORC-54) Evolve schemas based on field name rather than index 4 | - [ORC-84]({{site.jira}}/ORC-84) Create a separate java tool module. 5 | - [ORC-77]({{site.jira}}/ORC-77) and [ORC-81]({{site.jira}}/ORC-81) Implement LZO and LZ4 compression codecs. 6 | - [ORC-92]({{site.jira}}/ORC-92) Add support for nested column id selection in C++ 7 | - [ORC-69]({{site.jira}}/ORC-69) Add batch option support in orc-scan tools. 8 | 9 | Important fixes: 10 | 11 | - [HIVE-14214]({{site.jira}}/HIVE-14214) ORC schema evolution and predicate push down do not work together. 12 | 13 | -------------------------------------------------------------------------------- /site/_includes/orc_1.3.md: -------------------------------------------------------------------------------- 1 | The new features of ORC 1.3: 2 | 3 | - [ORC-58]({{site.jira}}/ORC-58) Split C++ Reader into Reader and RowReader 4 | - [ORC-120]({{site.jira}}/ORC-120) Add backwards compatibility mode for schema evolution. 5 | - [ORC-124]({{site.jira}}/ORC-124) Fast decimal improvements 6 | - [ORC-128]({{site.jira}}/ORC-128) Add ability to get statistics from writer 7 | 8 | 9 | -------------------------------------------------------------------------------- /site/_includes/orc_1.4.md: -------------------------------------------------------------------------------- 1 | The new features of ORC 1.4: 2 | 3 | - [ORC-72]({{site.jira}}/ORC-72) Add benchmark code for file formats. 4 | - [ORC-87]({{site.jira}}/ORC-87) Fix timestamp statistics in C++. 5 | - [ORC-150]({{site.jira}}/ORC-150) Add tool to convert from JSON. 6 | - [ORC-151]({{site.jira}}/ORC-151) Reduce the size of tools.jar. 7 | - [ORC-174]({{site.jira}}/ORC-174) Create a nohive variant of the jars. 8 | 9 | 10 | -------------------------------------------------------------------------------- /site/_includes/orc_1.5.md: -------------------------------------------------------------------------------- 1 | The new features of ORC 1.5: 2 | 3 | - [ORC-179]({{site.jira}}/ORC-179) Add ORC C++ Writer 4 | - [ORC-91]({{site.jira}}/ORC-91) Support for variable length blocks in HDFS. 5 | - [ORC-199]({{site.jira}}/ORC-199) Implement a CSV to ORC converter 6 | - [ORC-344]({{site.jira}}/ORC-344) Support for using Decimal64ColumnVector 7 | - [ORC-345]({{site.jira}}/ORC-345) Adding Decimal64StatisticsImpl 8 | - [ORC-331]({{site.jira}}/ORC-331) Support for building C++ under MSVC. 9 | - [ORC-234]({{site.jira}}/ORC-234) Support for older versions of Hadoop (>= 2.2.x) 10 | - [ORC-305]({{site.jira}}/ORC-305) Added statistics for size on disk 11 | -------------------------------------------------------------------------------- /site/_includes/orc_1.6.md: -------------------------------------------------------------------------------- 1 | The new features of ORC 1.6: 2 | 3 | - [ORC-14]({{site.jira}}/ORC-14) Add column encryption. 4 | - [ORC-189]({{site.jira}}/ORC-189) Add timestamp with local timezone 5 | - [ORC-203]({{site.jira}}/ORC-203) Trim minimum and maximum string values 6 | - [ORC-363]({{site.jira}}/ORC-363) Add zstd support in Java 7 | - [ORC-397]({{site.jira}}/ORC-397) Support selectively disabling dictionaries 8 | - [ORC-522]({{site.jira}}/ORC-522) Add type annotations 9 | -------------------------------------------------------------------------------- /site/_includes/orc_1.7.md: -------------------------------------------------------------------------------- 1 | The new features of ORC 1.7: 2 | 3 | - [ORC-377]({{site.jira}}/ORC-377) Support Snappy compression in C++ Writer 4 | - [ORC-577]({{site.jira}}/ORC-577) Support row-level filtering 5 | - [ORC-716]({{site.jira}}/ORC-716) Build and test on Java 17-EA 6 | - [ORC-731]({{site.jira}}/ORC-731) Improve `Java Tools` 7 | - [ORC-742]({{site.jira}}/ORC-742) LazyIO of non-filter columns 8 | - [ORC-751]({{site.jira}}/ORC-751) Implement Predicate Pushdown in C++ Reader 9 | - [ORC-755]({{site.jira}}/ORC-755) Introduce OrcFilterContext 10 | - [ORC-757]({{site.jira}}/ORC-757) Add Hashtable implementation for dictionary 11 | - [ORC-780]({{site.jira}}/ORC-780) Support LZ4 Compression in C++ Writer 12 | - [ORC-797]({{site.jira}}/ORC-797) Allow writers to get the stripe information 13 | - [ORC-818]({{site.jira}}/ORC-818) Build and test in Apple Silicon 14 | - [ORC-861]({{site.jira}}/ORC-861) Bump CMake minimum requirement to 2.8.12 15 | - [ORC-867]({{site.jira}}/ORC-867) Upgrade hive-storage-api to 2.8.1 16 | - [ORC-984]({{site.jira}}/ORC-984) Save the software version that wrote each ORC file 17 | -------------------------------------------------------------------------------- /site/_includes/orc_1.8.md: -------------------------------------------------------------------------------- 1 | The New Features and Notable Changes of ORC 1.8: 2 | 3 | - [ORC-450]({{site.jira}}/ORC-450) Support selecting list indices without materializing list items 4 | - [ORC-824]({{site.jira}}/ORC-824) Add column statistics for List and Map 5 | - [ORC-1004]({{site.jira}}/ORC-1004) Java ORC writer supports the selection vector 6 | - [ORC-1075]({{site.jira}}/ORC-1075) Support reading ORC files with no column statistics 7 | - [ORC-1125]({{site.jira}}/ORC-1125) Support decoding decimals in RLE 8 | - [ORC-1136]({{site.jira}}/ORC-1136) Optimize reads by combining multiple reads without significant separation into a single read 9 | - [ORC-1138]({{site.jira}}/ORC-1138) Seek vs Read Optimization 10 | - [ORC-1172]({{site.jira}}/ORC-1172) Add row count limit config for one stripe 11 | - [ORC-1212]({{site.jira}}/ORC-1212) Upgrade protobuf-java to 3.17.3 12 | - [ORC-1220]({{site.jira}}/ORC-1220) Set min.hadoop.version to 2.7.3 13 | - [ORC-1248]({{site.jira}}/ORC-1248) Redefine Hadoop dependency for Apache ORC 1.8.0 14 | - [ORC-1256]({{site.jira}}/ORC-1256) Publish test-jar to maven central 15 | - [ORC-1260]({{site.jira}}/ORC-1260) Publish shaded-protobuf classifier artifacts -------------------------------------------------------------------------------- /site/_includes/orc_1.9.md: -------------------------------------------------------------------------------- 1 | The New Features and Notable Changes of ORC 1.9: 2 | 3 | - [ORC-961]({{site.jira}}/ORC-961): Expose metrics of the reader 4 | - [ORC-1167]({{site.jira}}/ORC-1167): Support orc.row.batch.size configuration 5 | - [ORC-1252]({{site.jira}}/ORC-1252): Expose io metrics for write operation 6 | - [ORC-1301]({{site.jira}}/ORC-1301): Enforce C++ 17 7 | - [ORC-1310]({{site.jira}}/ORC-1310): allowlist Support for plugin filter 8 | - [ORC-1356]({{site.jira}}/ORC-1356): Use Intel AVX-512 instructions to accelerate the Rle-bit-packing decode 9 | - [ORC-1385]({{site.jira}}/ORC-1385): Support schema evolution from numeric to numeric 10 | - [ORC-1386]({{site.jira}}/ORC-1386): Support schema evolution from primitive to string group/decimal/timestamp 11 | -------------------------------------------------------------------------------- /site/_includes/orc_2.0.md: -------------------------------------------------------------------------------- 1 | The New Features and Notable Changes of ORC 2.0: 2 | 3 | - [ORC-1547]({{site.jira}}/ORC-1547): Spin-off ORC Format 4 | - [ORC-1572]({{site.jira}}/ORC-1572): Use Apache ORC Format 1.0.0 5 | - [ORC-1507]({{site.jira}}/ORC-1507): Support Java 21 6 | - [ORC-1512]({{site.jira}}/ORC-1512): Drop Java 8/11 and make Java 17 by default 7 | - [ORC-1577]({{site.jira}}/ORC-1577): Use ZSTD as the default compression 8 | - [ORC-1251]({{site.jira}}/ORC-1251): Use Hadoop Vectored IO 9 | - [ORC-1463]({{site.jira}}/ORC-1463): Support brotli codec 10 | - [ORC-1100]({{site.jira}}/ORC-1100): Support vcpkg 11 | -------------------------------------------------------------------------------- /site/_includes/orc_2.1.md: -------------------------------------------------------------------------------- 1 | The New Features and Notable Changes of ORC 2.1: 2 | 3 | - [ORC-262]({{site.jira}}/ORC-262) [C++] Support async prefetch in Orc reader 4 | - [ORC-1388]({{site.jira}}/ORC-1388) [C++] Support schema evolution from decimal to timestamp/string group 5 | - [ORC-1389]({{site.jira}}/ORC-1389) [C++] Support schema evolution from string group to numeric/string group 6 | - [ORC-1390]({{site.jira}}/ORC-1390) [C++] Support schema evolution from string group to decimal/timestamp 7 | - [ORC-1622]({{site.jira}}/ORC-1622) [C++] Support conan packaging 8 | - [ORC-1807]({{site.jira}}/ORC-1807) [C++] Native support for vcpkg 9 | -------------------------------------------------------------------------------- /site/_includes/primary-nav-items.html: -------------------------------------------------------------------------------- 1 | 28 | -------------------------------------------------------------------------------- /site/_includes/release_description.md: -------------------------------------------------------------------------------- 1 | {% comment %} 2 | Generates a description of a release. 3 | Parameters: 4 | releaseName - the name of the release (eg. 1.2.0) 5 | {% endcomment %} 6 | 7 | {% assign releaseData = site.data.releases[releaseName] %} 8 | {% if releaseData["state"] == "archived" %} 9 | {% assign mirror = site.dist_archive %} 10 | {% assign direct = site.dist_archive %} 11 | {% else %} 12 | {% assign mirror = site.dist_mirror %} 13 | {% assign direct = site.dist %} 14 | {% endif %} 15 | 16 | * Released: {{ releaseData["date"] | date: "%-d %B %Y" }} 17 | * Source code: [{{ releaseData["tar"] }}]({{mirror}}/orc-{{releaseName}}/{{releaseData["tar"]}}) 18 | * [GPG Signature]({{direct}}/orc-{{releaseName}}/{{releaseData["tar"]}}.asc) 19 | signed by [{{releaseData["signed-by"]}}]({{site.dist}}/KEYS) 20 | * Git tag: [rel/release-{{releaseName}}]({{site.tag_url}}/release-{{releaseName}}) 21 | * Maven Central: [ORC {{releaseName}}](https://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.orc%22%20AND%20v%3A%22{{releaseName}}%22) 22 | * SHA 256: [{{releaseData["sha256"] | truncate: 19}}]({{direct}}/orc-{{releaseName}}/{{releaseData["tar"]}}.sha256) 23 | * Fixed issues: [ORC-{{releaseName}}](https://issues.apache.org/jira/sr/jira.issueviews:searchrequest-printable/temp/SearchRequest.html?jqlQuery=project+%3D+ORC+AND+status+%3D+Closed+AND+fixVersion+%3D+%22{{releaseName}}%22&tempMax=500) 24 | 25 | -------------------------------------------------------------------------------- /site/_includes/release_table.html: -------------------------------------------------------------------------------- 1 | {% comment %} 2 | This include generates the table of releases. 3 | {% endcomment %} 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | {% for release in site.data.releases %} 16 | {% assign datestr = release[1]["date"] | date: "%Y/%m/%d" %} 17 | 18 | 19 | 20 | 21 | 24 | 25 | {% endfor %} 26 | 27 |
VersionDateStateRelease
{{ release[0] }}{{ release[1]["date"] }}{{ release[1]["state"] }} 22 | 23 | ORC-{{ release[0] }}
28 | -------------------------------------------------------------------------------- /site/_includes/top.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {{ page.title }} 6 | 7 | 8 | 9 | 10 | 11 | 15 | 16 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /site/_layouts/default.html: -------------------------------------------------------------------------------- 1 | {% include top.html %} 2 | 3 | 4 | {% include header.html %} 5 | 6 | {{ content }} 7 | 8 | {% include footer.html %} 9 | {% include anchor_links.html %} 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /site/_layouts/docs.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | --- 4 | 5 |
6 |
7 | 8 | {% include docs_contents_mobile.html %} 9 | 10 |
11 |
12 |

{{ page.title }}

13 | {{ content }} 14 | {% include section_nav.html %} 15 |
16 |
17 | 18 | {% include docs_contents.html %} 19 | 20 |
21 | 22 |
23 |
24 | -------------------------------------------------------------------------------- /site/_layouts/news.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | --- 4 | 5 |
6 |
7 | 8 | {% include news_contents_mobile.html %} 9 | 10 |
11 | {{ content }} 12 |
13 | 14 | {% include news_contents.html %} 15 | 16 |
17 | 18 |
19 |
20 | -------------------------------------------------------------------------------- /site/_layouts/news_item.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news 3 | --- 4 | 5 |
6 |

7 | {{ page.title }} 8 | 9 |

10 | 15 | 23 |
24 | {{ content }} 25 |
26 |
27 | -------------------------------------------------------------------------------- /site/_layouts/page.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | --- 4 | 5 |
6 |
7 | 8 |
9 |
10 |

{{ page.title }}

11 | {{ content }} 12 |
13 |
14 | 15 |
16 | 17 |
18 |
19 | -------------------------------------------------------------------------------- /site/_posts/2015-05-11-new-committers.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC adds 7 committers" 4 | date: "2015-05-11 17:23:00 -0800" 5 | author: omalley 6 | categories: [team] 7 | --- 8 | 9 | The ORC project management committee today added seven new committers 10 | for their work on ORC. Welcome all! 11 | 12 | * Gunther Hagleitner 13 | * Aliaksei Sandryhaila 14 | * Sergey Shelukhin 15 | * Gopal Vijayaraghavan 16 | * Stephen Walkauskas 17 | * Kevin Wilfong 18 | * Xuefu Zhang 19 | -------------------------------------------------------------------------------- /site/_posts/2015-06-26-new-logo.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC adopts new logo" 4 | date: "2015-06-26 08:00:00 -0800" 5 | author: omalley 6 | categories: [project] 7 | --- 8 | 9 | The ORC project has adopted a new logo. We hope you like it. 10 | 11 | ![orc logo](/img/logo.png "orc logo") 12 | 13 | Other great options included a big white hand on a black shield. *smile* -------------------------------------------------------------------------------- /site/_posts/2015-11-19-aliaksei-on-pmc.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC adds Aliaksei Sandryhaila to PMC" 4 | date: "2015-11-19 12:47:00 -0800" 5 | author: omalley 6 | categories: [team] 7 | --- 8 | 9 | On behalf of the Apache ORC Project Management Committee (PMC), it gives 10 | me great pleasure to announce that Aliaksei Sandryhaila has joined the Apache 11 | ORC PMC. He has done lot of good work on ORC and I'm looking forward to 12 | more. 13 | 14 | Please join me in welcoming Aliaksei to ORC PMC! 15 | 16 | Congratulations Aliaksei! 17 | -------------------------------------------------------------------------------- /site/_posts/2016-01-25-ORC-1.0.0.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.0.0 Released" 4 | date: "2016-01-25 16:45:00 -0800" 5 | author: omalley 6 | version: 1.0.0 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.0.0. This release 11 | contains the native C++ ORC reader and some tools. 12 | 13 | {% assign releaseName = "1.0.0" %} 14 | {% include release_description.md %} 15 | 16 | The major features: 17 | 18 | - Portable pure C++ ORC reader 19 | - The C++ reader is known to work on: 20 | * CentOS and RHEL 5, 6, and 7 21 | * Debian 6 and 7 22 | * Ubuntu 12 and 14 23 | * Mac OS 10.10 and 10.11 24 | - A file-contents command that prints the contents of the file as json records. 25 | - A file-metadata command that prints the metadata of the file. 26 | - Docker files for building and testing on various Linux distributions. 27 | - Memory estimation for the reader. 28 | 29 | {% include known_issues.md %} 30 | -------------------------------------------------------------------------------- /site/_posts/2016-06-10-ORC-1.1.0.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.1.0 Released" 4 | date: "2016-06-10 08:00:00 -0800" 5 | author: omalley 6 | version: 1.1.0 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.1.0. This release 11 | contains the Java reader and writer and the native C++ ORC reader and tools. 12 | 13 | Release Artifacts: 14 | 15 | {% assign releaseName = "1.1.0" %} 16 | {% include release_description.md %} 17 | {% include orc_1.1.md %} 18 | {% include known_issues.md %} 19 | -------------------------------------------------------------------------------- /site/_posts/2016-06-13-ORC-1.1.1.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.1.1 Released" 4 | date: "2016-06-13 08:00:00 -0800" 5 | author: omalley 6 | version: 1.1.1 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.1.1. This release 11 | contains the Java reader and writer and the native C++ ORC reader and tools. 12 | 13 | {% assign releaseName = "1.1.1" %} 14 | {% include release_description.md %} 15 | {% include orc_1.1.md %} 16 | {% include known_issues.md %} 17 | -------------------------------------------------------------------------------- /site/_posts/2016-06-28-file-format-talk.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "File format benchmark" 4 | date: "2016-06-28 08:00:00 -0800" 5 | author: omalley 6 | categories: [talk] 7 | --- 8 | 9 | I gave a talk at Hadoop Summit San Jose 2016 about a file format 10 | benchmark that I've contributed as [ORC-72]({{site.jira}}/ORC-72). The 11 | benchmark focuses on real data sets that are publicly available. The data 12 | sets represent a wide variety of use cases: 13 | 14 | * *NYC Taxi Data* - very dense data with mostly numeric types 15 | * *Github Archives* - very sparse data with a lot of complex structure 16 | * *Sales* - a real production schema from a sales table with a synthetic generator 17 | 18 | The benchmarks look at a set of three very common use cases: 19 | 20 | * *Full table scan* - read all columns and rows 21 | * *Column projection* - read some columns, but all of the rows 22 | * *Column projection and predicate push down* - read some columns and some rows 23 | 24 | You can see the slides here: 25 | 26 | [File Format Benchmarks: Avro, JSON, ORC, & Parquet](https://www.slideshare.net/oom65/file-format-benchmarks-avro-json-orc-parquet) 27 | 28 | 32 | -------------------------------------------------------------------------------- /site/_posts/2016-07-08-ORC-1.1.2.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.1.2 Released" 4 | date: "2016-07-08 15:00:00 -0800" 5 | author: omalley 6 | version: 1.1.2 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.1.2. This release 11 | contains the Java reader and writer and the native C++ ORC reader and tools. 12 | 13 | {% assign releaseName = "1.1.2" %} 14 | {% include release_description.md %} 15 | {% include orc_1.1.md %} 16 | {% include known_issues.md %} 17 | -------------------------------------------------------------------------------- /site/_posts/2016-08-25-ORC-1.2.0.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.2.0 Released" 4 | date: "2016-08-25 12:00:00 -0700" 5 | author: omalley 6 | version: 1.2.0 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.2.0. 11 | 12 | {% assign releaseName = "1.2.0" %} 13 | {% include release_description.md %} 14 | {% include orc_1.2.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2016-10-05-ORC-1.2.1.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.2.1 Released" 4 | date: "2016-10-05 16:00:00 -0700" 5 | author: omalley 6 | version: 1.2.1 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.2.1. 11 | 12 | {% assign releaseName = "1.2.1" %} 13 | {% include release_description.md %} 14 | {% include orc_1.2.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2016-12-01-ORC-1.2.2.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.2.2 Released" 4 | date: "2016-12-01 16:00:00 -0700" 5 | author: omalley 6 | version: 1.2.2 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.2.2. 11 | 12 | {% assign releaseName = "1.2.2" %} 13 | {% include release_description.md %} 14 | {% include orc_1.2.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2016-12-12-ORC-1.2.3.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.2.3 Released" 4 | date: "2016-12-12 16:00:00 -0700" 5 | author: omalley 6 | version: 1.2.3 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.2.3. This release 11 | fixes some bugs in the Java schema evolution code. 12 | 13 | {% assign releaseName = "1.2.3" %} 14 | {% include release_description.md %} 15 | {% include orc_1.2.md %} 16 | {% include known_issues.md %} 17 | -------------------------------------------------------------------------------- /site/_posts/2016-12-15-new-committers.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC adds new committers" 4 | date: "2016-12-15 17:23:00 -0800" 5 | author: omalley 6 | categories: [team] 7 | --- 8 | 9 | As part of the removal of the ORC code base from Hive, the ORC PMC has 10 | offered to make any existing Hive committers into ORC committers. The new ORC 11 | committers coming from Hive are: 12 | 13 | * Aihua Xu 14 | * Ashutosh Chauhan 15 | * Carl Steinbach 16 | * Chaoyu Tang 17 | * Chinna Rao Lalam 18 | * Daniel Dai 19 | * Eugene Koifman 20 | * Ferdinand Xu 21 | * Jason Dere 22 | * Jesus Camacho Rodriguez 23 | * Jimmy Xiang 24 | * Lars Francke 25 | * Matthew McCline 26 | * Mithun Radhakrishnan 27 | * Naveen Gangam 28 | * Pengcheng Xiong 29 | * Rajesh Balamohan 30 | * Rui Li 31 | * Sergio Pena 32 | * Siddharth Seth 33 | * Vaibhav Gumashta 34 | * Wei Zheng 35 | * Yongzhi Chen 36 | -------------------------------------------------------------------------------- /site/_posts/2017-01-04-gopal-on-pmc.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC adds Gopal Vijayaraghavan to PMC" 4 | date: "2017-01-04 10:40:00 -0800" 5 | author: omalley 6 | categories: [team] 7 | --- 8 | 9 | On behalf of the Apache ORC Project Management Committee (PMC), it gives 10 | me great pleasure to announce that Gopal Vijayaraghavan has joined the PMC. 11 | Gopal has done an amazing job at speeding up ORC in many ways. 12 | 13 | Please join me in welcoming Gopal to the ORC PMC! 14 | 15 | Congratulations Gopal! 16 | -------------------------------------------------------------------------------- /site/_posts/2017-01-23-ORC-1.3.0.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.3.0 Released" 4 | date: "2017-01-23 12:00:00 -0800" 5 | author: omalley 6 | version: 1.3.0 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.3.0. 11 | 12 | {% assign releaseName = "1.3.0" %} 13 | {% include release_description.md %} 14 | {% include orc_1.3.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2017-02-03-ORC-1.3.1.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.3.1 Released" 4 | date: "2017-02-03 12:00:00 -0800" 5 | author: omalley 6 | version: 1.3.1 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.3.1. 11 | 12 | {% assign releaseName = "1.3.1" %} 13 | {% include release_description.md %} 14 | {% include orc_1.3.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2017-02-13-ORC-1.3.2.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.3.2 Released" 4 | date: "2017-02-13 12:00:00 -0800" 5 | author: omalley 6 | version: 1.3.2 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.3.2. 11 | 12 | {% assign releaseName = "1.3.2" %} 13 | {% include release_description.md %} 14 | {% include orc_1.3.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2017-02-21-ORC-1.3.3.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.3.3 Released" 4 | date: "2017-02-21 12:00:00 -0800" 5 | author: omalley 6 | version: 1.3.3 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.3.3. 11 | 12 | {% assign releaseName = "1.3.3" %} 13 | {% include release_description.md %} 14 | {% include orc_1.3.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2017-05-08-ORC-1.4.0.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.4.0 Released" 4 | date: "2017-05-08 12:00:00 -0800" 5 | author: omalley 6 | version: 1.4.0 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.4.0. 11 | 12 | {% assign releaseName = "1.4.0" %} 13 | {% include release_description.md %} 14 | {% include orc_1.4.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2017-05-16-new-committer.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "Deepak Majeti added as committer" 4 | date: "2017-05-16 12:00:00 -0700" 5 | author: omalley 6 | categories: [team] 7 | --- 8 | 9 | The ORC PMC is happy to add Deepak Majeti as an ORC committer for the 10 | work on the C++ ORC reader including both contributions and reviews of 11 | other's patches. Thank you for your work on ORC, Deepak! -------------------------------------------------------------------------------- /site/_posts/2017-09-06-new-pmc.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC adds Eugene and Deepak to PMC" 4 | date: "2017-09-06 10:11:12 -0800" 5 | author: omalley 6 | categories: [team] 7 | --- 8 | 9 | On behalf of the Apache ORC Project Management Committee (PMC), it gives 10 | me great pleasure to announce that Eugene Koifman and Deepak Majeti have 11 | joined the PMC. Eugene has been critical working on ACID and Deepak has 12 | been doing great work on the C++ code base. 13 | 14 | Please join me in welcoming Eugene and Deepak to the ORC PMC! 15 | 16 | -------------------------------------------------------------------------------- /site/_posts/2017-10-16-ORC-1.3.4.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.3.4 Released" 4 | date: "2017-10-16 12:00:00 -0800" 5 | author: prasanthj 6 | version: 1.3.4 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.3.4. 11 | 12 | {% assign releaseName = "1.3.4" %} 13 | {% include release_description.md %} 14 | {% include orc_1.3.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2017-10-16-ORC-1.4.1.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.4.1 Released" 4 | date: "2017-10-16 12:00:00 -0800" 5 | author: prasanthj 6 | version: 1.4.1 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.4.1. 11 | 12 | {% assign releaseName = "1.4.1" %} 13 | {% include release_description.md %} 14 | {% include orc_1.4.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2018-01-23-ORC-1.4.2.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.4.2 Released" 4 | date: "2018-01-23 08:00:00 -0800" 5 | author: omalley 6 | version: 1.4.2 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.4.2. 11 | 12 | {% assign releaseName = "1.4.2" %} 13 | {% include release_description.md %} 14 | {% include orc_1.4.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2018-02-09-ORC-1.4.3.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.4.3 Released" 4 | date: "2018-02-09 08:00:00 -0800" 5 | author: omalley 6 | version: 1.4.3 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.4.3. 11 | 12 | {% assign releaseName = "1.4.3" %} 13 | {% include release_description.md %} 14 | {% include orc_1.4.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2018-03-27-add-xiening-and-gang.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "Gang Wu and Xiening Dai added as committer" 4 | date: "2018-03-27 12:00:00 -0700" 5 | author: omalley 6 | categories: [team] 7 | --- 8 | 9 | The ORC PMC is happy to add Gang Wu and Xiening Dai as ORC committers for their 10 | work on the C++ ORC writer. 11 | 12 | Thank you for your work on ORC, Gang and Xiening! -------------------------------------------------------------------------------- /site/_posts/2018-05-14-ORC-1.4.4.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.4.4 Released" 4 | date: "2018-05-14 08:00:00 -0800" 5 | author: omalley 6 | version: 1.4.4 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.4.4. 11 | 12 | {% assign releaseName = "1.4.4" %} 13 | {% include release_description.md %} 14 | {% include orc_1.4.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2018-05-14-ORC-1.5.0.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.5.0 Released" 4 | date: "2018-05-14 12:00:00 -0800" 5 | author: omalley 6 | version: 1.5.0 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.5.0. 11 | 12 | {% assign releaseName = "1.5.0" %} 13 | {% include release_description.md %} 14 | {% include orc_1.5.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2018-05-25-ORC-1.5.1.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.5.1 Released" 4 | date: "2018-05-25 14:00:00 -0700" 5 | author: omalley 6 | version: 1.5.1 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.5.1. 11 | 12 | {% assign releaseName = "1.5.1" %} 13 | {% include release_description.md %} 14 | {% include orc_1.5.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2018-06-29-ORC-1.5.2.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.5.2 Released" 4 | date: "2018-06-29 18:00:00 -0700" 5 | author: prasanthj 6 | version: 1.5.2 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.5.2. 11 | 12 | {% assign releaseName = "1.5.2" %} 13 | {% include release_description.md %} 14 | {% include orc_1.5.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2018-09-25-ORC-1.5.3.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.5.3 Released" 4 | date: "2018-09-25 18:00:00 -0700" 5 | author: omalley 6 | version: 1.5.3 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.5.3. 11 | 12 | {% assign releaseName = "1.5.3" %} 13 | {% include release_description.md %} 14 | {% include orc_1.5.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2018-12-21-ORC-1.5.4.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.5.4 Released" 4 | date: "2018-12-21 18:00:00 -0700" 5 | author: vgumashta 6 | version: 1.5.4 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.5.4. 11 | 12 | {% assign releaseName = "1.5.4" %} 13 | {% include release_description.md %} 14 | {% include orc_1.5.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2019-01-10-add-dongjoon.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "Dongjoon Hyun added as committer" 4 | date: "2019-01-10 12:00:00 -0700" 5 | author: omalley 6 | categories: [team] 7 | --- 8 | 9 | The ORC PMC is happy to add Dongjoon Hyun as an ORC committer for the 10 | work on improving ORC's integration to Spark. 11 | 12 | Thank you for your work on ORC, Dongjoon! -------------------------------------------------------------------------------- /site/_posts/2019-01-11-gang-pmc.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC adds Gang Wu to PMC" 4 | date: "2019-01-11 12:00:00 -0800" 5 | author: omalley 6 | categories: [team] 7 | --- 8 | 9 | On behalf of the Apache ORC Project Management Committee (PMC), it gives 10 | me great pleasure to announce that Gang Wu has joined the PMC. Gang 11 | has been doing great work on the C++ code base. 12 | 13 | Please join me in welcoming Gang to the ORC PMC! 14 | 15 | -------------------------------------------------------------------------------- /site/_posts/2019-03-14-ORC-1.5.5.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.5.5 Released" 4 | date: "2019-03-14 12:00:00 -0800" 5 | author: omalley 6 | version: 1.5.5 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.5.5. 11 | 12 | {% assign releaseName = "1.5.5" %} 13 | {% include release_description.md %} 14 | {% include orc_1.5.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2019-06-10-renat-and-sandeep.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "Renat Vailiullin and Sandeep More added as committers" 4 | date: "2019-06-10 12:00:00 -0700" 5 | author: omalley 6 | categories: [team] 7 | --- 8 | 9 | The ORC PMC is happy to add Renat Vailiullin and Sandeep More as an 10 | ORC committers. Renat has done a lot of work to improve the Windows builds 11 | and Sandeep has been working on the data masking and statistics. 12 | 13 | Thank you for your work on ORC, Renat and Sandeep! -------------------------------------------------------------------------------- /site/_posts/2019-06-27-ORC-1.5.6.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.5.6 Released" 4 | date: "2019-06-27 12:00:00 -0800" 5 | author: omalley 6 | version: 1.5.6 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.5.6. 11 | 12 | Users are advised that as of ORC 1.5.6, ORCReaders that aren't used to 13 | create RecordReaders should be closed. 14 | 15 | {% assign releaseName = "1.5.6" %} 16 | {% include release_description.md %} 17 | {% include orc_1.5.md %} 18 | {% include known_issues.md %} 19 | -------------------------------------------------------------------------------- /site/_posts/2019-09-03-ORC-1.6.0.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.6.0 Released" 4 | date: "2019-09-03 17:00:00 -0700" 5 | author: omalley 6 | version: 1.6.0 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.6.0. 11 | 12 | {% assign releaseName = "1.6.0" %} 13 | {% include release_description.md %} 14 | {% include orc_1.6.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2019-10-26-ORC-1.5.7.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.5.7 Released" 4 | date: "2019-10-26 12:00:00 -0800" 5 | author: omalley 6 | version: 1.5.7 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.5.7. 11 | 12 | {% assign releaseName = "1.5.7" %} 13 | {% include release_description.md %} 14 | {% include orc_1.5.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2019-10-26-ORC-1.6.1.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.6.1 Released" 4 | date: "2019-10-26 12:00:00 -0800" 5 | author: omalley 6 | version: 1.6.1 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.6.1. 11 | 12 | {% assign releaseName = "1.6.1" %} 13 | {% include release_description.md %} 14 | {% include orc_1.6.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2019-11-24-ORC-1.5.8.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.5.8 Released" 4 | date: "2019-11-24 12:00:00 -0800" 5 | author: omalley 6 | version: 1.5.8 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.5.8. 11 | 12 | {% assign releaseName = "1.5.8" %} 13 | {% include release_description.md %} 14 | {% include orc_1.5.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2019-11-24-ORC-1.6.2.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.6.2 Released" 4 | date: "2019-11-24 12:00:00 -0800" 5 | author: omalley 6 | version: 1.6.2 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.6.2. 11 | 12 | {% assign releaseName = "1.6.2" %} 13 | {% include release_description.md %} 14 | {% include orc_1.6.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2019-12-09-ORC-1.4.5.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.4.5 Released" 4 | date: "2019-12-09 08:00:00 -0800" 5 | author: omalley 6 | version: 1.4.5 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.4.5. 11 | 12 | {% assign releaseName = "1.4.5" %} 13 | {% include release_description.md %} 14 | {% include orc_1.4.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2019-12-09-dongjoon-pmc.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC adds Dongjoon Hyun to PMC" 4 | date: "2019-12-09 12:00:00 -0800" 5 | author: omalley 6 | categories: [team] 7 | --- 8 | 9 | On behalf of the Apache ORC Project Management Committee (PMC), it gives 10 | me great pleasure to announce that Dongjoon Hyun has joined the PMC. Dongjoon 11 | has radically improved the integration between Spark and ORC. 12 | 13 | Please join me in welcoming Dongjoon to the ORC PMC! 14 | 15 | -------------------------------------------------------------------------------- /site/_posts/2020-01-30-ORC-1.5.9.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.5.9 Released" 4 | date: "2020-01-30 16:00:00 -0800" 5 | author: omalley 6 | version: 1.5.9 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.5.9. 11 | 12 | {% assign releaseName = "1.5.9" %} 13 | {% include release_description.md %} 14 | {% include orc_1.5.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2020-04-26-ORC-1.5.10.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.5.10 Released" 4 | date: "2020-04-26 16:00:00 -0800" 5 | author: omalley 6 | version: 1.5.10 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.5.10. 11 | 12 | {% assign releaseName = "1.5.10" %} 13 | {% include release_description.md %} 14 | {% include orc_1.5.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2020-04-26-ORC-1.6.3.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.6.3 Released" 4 | date: "2020-04-26 12:00:00 -0800" 5 | author: omalley 6 | version: 1.6.3 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.6.3. 11 | 12 | {% assign releaseName = "1.6.3" %} 13 | {% include release_description.md %} 14 | {% include orc_1.6.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2020-09-14-ORC-1.5.11.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.5.11 Released" 4 | date: "2020-09-14 11:00:00 -0800" 5 | author: omalley 6 | version: 1.5.11 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.5.11. 11 | 12 | {% assign releaseName = "1.5.11" %} 13 | {% include release_description.md %} 14 | {% include orc_1.5.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2020-09-14-ORC-1.6.4.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.6.4 Released" 4 | date: "2020-09-14 12:00:00 -0800" 5 | author: omalley 6 | version: 1.6.4 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.6.4. 11 | 12 | {% assign releaseName = "1.6.4" %} 13 | {% include release_description.md %} 14 | {% include orc_1.6.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2020-09-30-ORC-1.5.12.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.5.12 Released" 4 | date: "2020-09-30 12:00:00 -0800" 5 | author: omalley 6 | version: 1.5.12 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.5.12. 11 | 12 | {% assign releaseName = "1.5.12" %} 13 | {% include release_description.md %} 14 | {% include orc_1.5.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2020-10-01-ORC-1.6.5.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.6.5 Released" 4 | date: "2020-10-01 12:00:00 -0800" 5 | author: omalley 6 | version: 1.6.5 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.6.5. 11 | 12 | {% assign releaseName = "1.6.5" %} 13 | {% include release_description.md %} 14 | {% include orc_1.6.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2020-11-16-add-panagiotis.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "Panagiotis Garefalakis added as committer" 4 | date: "2020-11-16 12:00:00 -0700" 5 | author: dongjoon 6 | categories: [team] 7 | --- 8 | 9 | The ORC PMC is happy to add Panagiotis Garefalakis as an ORC committer for the 10 | work on improving ORC's integration to Apache Hive. 11 | 12 | Thank you for your work on ORC, Panagiotis! 13 | -------------------------------------------------------------------------------- /site/_posts/2020-12-10-ORC-1.6.6.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.6.6 Released" 4 | date: "2020-12-10 15:00:00 -0800" 5 | author: dongjoon 6 | version: 1.6.6 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.6.6. 11 | 12 | {% assign releaseName = "1.6.6" %} 13 | {% include release_description.md %} 14 | {% include orc_1.6.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2021-01-22-ORC-1.6.7.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.6.7 Released" 4 | date: "2021-01-22 14:00:00 -0800" 5 | author: dongjoon 6 | version: 1.6.7 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.6.7. 11 | 12 | {% assign releaseName = "1.6.7" %} 13 | {% include release_description.md %} 14 | {% include orc_1.6.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2021-02-08-panagiotis-pmc.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC adds Panagiotis Garefalakis to PMC" 4 | date: "2021-02-08 12:00:00 -0800" 5 | author: dongjoon 6 | categories: [team] 7 | --- 8 | 9 | On behalf of the Apache ORC Project Management Committee (PMC), it gives 10 | me great pleasure to announce that Panagiotis Garefalakis has joined the PMC. Panagiotis 11 | has radically improved the integration between Hive and ORC. 12 | 13 | Please join me in welcoming Panagiotis to the ORC PMC! 14 | 15 | -------------------------------------------------------------------------------- /site/_posts/2021-04-13-add-william.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "William Hyun added as committer" 4 | date: "2021-04-13 12:00:00 -0700" 5 | author: dongjoon 6 | categories: [team] 7 | --- 8 | 9 | The ORC PMC is happy to add William Hyun as an ORC committer for the 10 | work on improving ORC's code quality and integration to Apache Spark and Apache Iceberg. 11 | 12 | Thank you for your work on ORC, William! 13 | -------------------------------------------------------------------------------- /site/_posts/2021-05-21-ORC-1.6.8.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.6.8 Released" 4 | date: "2021-05-21 14:00:00 -0800" 5 | author: dongjoon 6 | version: 1.6.8 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.6.8. 11 | 12 | {% assign releaseName = "1.6.8" %} 13 | {% include release_description.md %} 14 | {% include orc_1.6.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2021-07-02-ORC-1.6.9.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.6.9 Released" 4 | date: "2021-07-02 02:00:00 -0800" 5 | author: dongjoon 6 | version: 1.6.9 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.6.9. 11 | 12 | {% assign releaseName = "1.6.9" %} 13 | {% include release_description.md %} 14 | {% include orc_1.6.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2021-08-10-ORC-1.6.10.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.6.10 Released" 4 | date: "2021-08-10 02:00:00 -0800" 5 | author: omalley 6 | version: 1.6.10 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.6.10.. 11 | 12 | {% assign releaseName = "1.6.10" %} 13 | {% include release_description.md %} 14 | {% include orc_1.6.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2021-09-15-ORC-1.5.13.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.5.13 Released" 4 | date: "2021-09-15 02:00:00 -0800" 5 | author: dongjoon 6 | version: 1.5.13 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.5.13. 11 | 12 | {% assign releaseName = "1.5.13" %} 13 | {% include release_description.md %} 14 | {% include orc_1.5.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2021-09-15-ORC-1.6.11.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.6.11 Released" 4 | date: "2021-09-15 02:00:00 -0800" 5 | author: dongjoon 6 | version: 1.6.11 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.6.11. 11 | 12 | {% assign releaseName = "1.6.11" %} 13 | {% include release_description.md %} 14 | {% include orc_1.6.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2021-09-15-ORC-1.7.0.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.7.0 Released" 4 | date: "2021-09-15 03:00:00 -0800" 5 | author: dongjoon 6 | version: 1.7.0 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.7.0. 11 | 12 | {% assign releaseName = "1.7.0" %} 13 | {% include release_description.md %} 14 | {% include orc_1.7.md %} 15 | {% include known_issues.md %} 16 | -------------------------------------------------------------------------------- /site/_posts/2021-10-02-william-pmc.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC adds William Hyun to PMC" 4 | date: "2021-10-02 12:00:00 -0800" 5 | author: dongjoon 6 | categories: [team] 7 | --- 8 | 9 | On behalf of the Apache ORC Project Management Committee (PMC), it gives 10 | me great pleasure to announce that William Hyun has joined the PMC. 11 | William has led several areas including Java 17/Apple Silicon support, Java Tools improvement, 12 | Code quality improvement using static analysis, CI/Docker test coverage improvement, 13 | and Apache ORC 1.7 migration support at Apache Arrow/Druid/Iceberg. 14 | 15 | Please join me in welcoming William to the ORC PMC! 16 | 17 | -------------------------------------------------------------------------------- /site/_posts/2021-11-07-ORC-1.6.12.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.6.12 Released" 4 | date: "2021-11-07 02:00:00 -0800" 5 | author: dongjoon 6 | version: 1.6.12 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.6.12. 11 | 12 | {% assign releaseName = "1.6.12" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes of ORC 1.6.12: 16 | 17 | - [ORC-1008]({{site.jira}}/ORC-1008) Overflow detection code is incorrect in IntegerColumnStatisticsImpl 18 | - [ORC-1010]({{site.jira}}/ORC-1010) Bump tzdata from tzdata-2020e-1.tar.xz to tzdata-2021b-1.tar.xz 19 | - [ORC-1024]({{site.jira}}/ORC-1024) BloomFilter hash computation is inconsistent between Java and C++ clients 20 | - [ORC-1029]({{site.jira}}/ORC-1029) Could not load 'org.apache.orc.DataMask.Provider' when using orc encryption and spark executor with multi cores! 21 | - [ORC-1034]({{site.jira}}/ORC-1034) The search byte array algorithm is incorrectly implemented in FileDump.java 22 | - [ORC-1035]({{site.jira}}/ORC-1035) `backupDataPath` may be incorrect in recoverFile 23 | - [ORC-1036]({{site.jira}}/ORC-1036) Due to tzdata upgrade, the fixed download links in CI are often not working 24 | - [ORC-1040]({{site.jira}}/ORC-1040) Add Debian 11 docker test 25 | - [ORC-1042]({{site.jira}}/ORC-1042) Ignore unused-function C++ compile warning on CentOS 7 26 | - [ORC-1043]({{site.jira}}/ORC-1043) Fix C++ conversion compilation error in CentOS 7 27 | -------------------------------------------------------------------------------- /site/_posts/2021-11-23-add-yiqun.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "Yiqun Zhang added as committer" 4 | date: "2021-11-23 12:00:00 -0700" 5 | author: dongjoon 6 | categories: [team] 7 | --- 8 | 9 | The ORC PMC is happy to add Yiqun Zhang as an ORC committer 10 | for the work on improving ORC tools. 11 | 12 | Thank you for your work on ORC, Yiqun! 13 | -------------------------------------------------------------------------------- /site/_posts/2022-01-20-ORC-1.6.13.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.6.13 Released" 4 | date: "2022-01-20 03:00:00 -0800" 5 | author: dongjoon 6 | version: 1.6.13 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.6.13. 11 | 12 | {% assign releaseName = "1.6.13" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1065]({{site.jira}}/ORC-1065) Fix IndexOutOfBoundsException in ReaderImpl.extractFileTail 17 | - [ORC-1078]({{site.jira}}/ORC-1078) Row group end offset doesn't accommodate all the blocks 18 | 19 | The 'tests' fixes: 20 | - [ORC-875]({{site.jira}}/ORC-875) Add GitHub Action job for Windows Server 2019 21 | - [ORC-941]({{site.jira}}/ORC-941) Move MacOS 10.15/11.5 test from Travis to GitHub Action 22 | - [ORC-1079]({{site.jira}}/ORC-1079) Add Linux Clang 11 GitHub Action test coverage 23 | - [ORC-1080]({{site.jira}}/ORC-1080) Remove .travis.yml 24 | 25 | -------------------------------------------------------------------------------- /site/_posts/2022-03-05-add-quanlong.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "Quanlong Huang added as committer" 4 | date: "2022-03-05 12:00:00 -0700" 5 | author: gangwu 6 | categories: [team] 7 | --- 8 | 9 | The ORC PMC is happy to add Quanlong Huang as an ORC committer 10 | for the work on ORC C++ library and Apache Impala integration. 11 | 12 | Thank you for your work on ORC, Quanlong! 13 | -------------------------------------------------------------------------------- /site/_posts/2022-04-14-ORC-1.6.14.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.6.14 Released" 4 | date: "2022-04-14 03:00:00 -0800" 5 | author: dongjoon 6 | version: 1.6.14 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.6.14. 11 | 12 | {% assign releaseName = "1.6.14" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1121]({{site.jira}}/ORC-1121) Fix column coversion check bug which causes column filters don't work 17 | - [ORC-1146]({{site.jira}}/ORC-1146) Float category missing check if the statistic sum is a finite value 18 | - [ORC-1147]({{site.jira}}/ORC-1147) Use isNaN instead of isFinite to determine the contain NaN values 19 | 20 | The 'tests' fixes: 21 | - [ORC-1016]({{site.jira}}/ORC-1016) Use `openssl@1.1` in GitHub Action MacOS CIs 22 | - [ORC-1113]({{site.jira}}/ORC-1113) Remove CentOS 8 from docker-based tests 23 | 24 | -------------------------------------------------------------------------------- /site/_posts/2022-04-15-ORC-1.7.4.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.7.4 Released" 4 | date: "2022-04-15 03:00:00 -0800" 5 | author: william 6 | version: 1.7.4 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.7.4. 11 | 12 | {% assign releaseName = "1.7.4" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1120]({{site.jira}}/ORC-1120) Remove C++ library limitation about write version 17 | - [ORC-1121]({{site.jira}}/ORC-1121) Fix column conversion check bug which causes column filters don't work 18 | - [ORC-1127]({{site.jira}}/ORC-1127) Add missing version of UNSTABLE-PRE-2.0 19 | - [ORC-1146]({{site.jira}}/ORC-1146) Float category missing check if the statistic sum is a finite value 20 | - [ORC-1147]({{site.jira}}/ORC-1147) Use isNaN instead of isFinite to determine the contain NaN values 21 | 22 | The improvements: 23 | - [ORC-236]({{site.jira}}/ORC-236) Support `UNION` type in Java Convert tool 24 | - [ORC-1116]({{site.jira}}/ORC-1116) Fix csv-import tool when exporting long bytes 25 | - [ORC-1123]({{site.jira}}/ORC-1123) Add estimationMemory method for writer 26 | 27 | The test changes: 28 | - [ORC-1145]({{site.jira}}/ORC-1145) Add Java 18 to GitHub Action CI 29 | - [ORC-1118]({{site.jira}}/ORC-1118) Support Java 17 and ARM64 docker tests 30 | 31 | The documentation changes: 32 | - [ORC-1117]({{site.jira}}/ORC-1117) Add `Dask` page at `Using in Python` section 33 | - [ORC-1119]({{site.jira}}/ORC-1119) Remove timestamp from ORC API docs 34 | -------------------------------------------------------------------------------- /site/_posts/2022-05-08-yiqun-pmc.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC adds Yiqun Zhang to PMC" 4 | date: "2022-05-08 3:00:00 -0800" 5 | author: william 6 | categories: [team] 7 | --- 8 | 9 | The Apache ORC Project Management Committee (PMC) is happy to announce 10 | that Yiqun Zhang has joined us as a new member of the PMC. 11 | Yiqun has been showing consistent contributions as a committer, 12 | and participated in both major and maintenance releases by actively 13 | helping the release managers with testing the release candidates. 14 | 15 | Please welcome Yiqun to the ORC PMC! 16 | -------------------------------------------------------------------------------- /site/_posts/2022-06-05-add-pavan.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC adds Pavan Lanka to PMC" 4 | title: "Pavan Lanka added as committer" 5 | date: "2022-06-05 3:00:00 -0800" 6 | author: dongjoon 7 | categories: [team] 8 | --- 9 | 10 | The ORC PMC is happy to add Pavan Lanka as an ORC committer for the 11 | work on introducing LazyIO of non-filter columns and optimizing stripe index and data reads. 12 | 13 | Thank you for your work on ORC, Pavan! 14 | -------------------------------------------------------------------------------- /site/_posts/2022-09-12-william-chair.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "William Hyun elected as Chair" 4 | date: "2022-09-21 12:00:00 -0800" 5 | author: dongjoon 6 | categories: [team] 7 | --- 8 | 9 | The Apache ORC Project Management Committee (PMC) elected William Hyun as the Chair on September 12nd 10 | and Apache Software Foundation (ASF) Board approved it and appointed him as Vice President for Apache ORC on September 21st. 11 | 12 | William has been leading many areas. 13 | He helped Apache ORC PMC add a new member, 14 | served as a release manager for 1.7.4/1.7.5/1.7.6/1.8.0, 15 | made an important contribution on inter-ASF project collaboration and ORC integration across several projects to help all ORC users, 16 | improved ORC infra like ASF ORC DockerHub Setup, docker tests, and GitHub Action, 17 | and revamped user experiences through updating websites and Homebrew. 18 | -------------------------------------------------------------------------------- /site/_posts/2022-11-17-ORC-1.7.7.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.7.7 Released" 4 | date: "2022-11-17 03:00:00 -0800" 5 | author: dongjoon 6 | version: 1.7.7 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.7.7. 11 | 12 | {% assign releaseName = "1.7.7" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1283]({{site.jira}}/ORC-1283) ENABLE_INDEXES does not take effect 17 | 18 | The test changes: 19 | - [ORC-1254]({{site.jira}}/ORC-1254) Add spotbugs check 20 | - [ORC-1299]({{site.jira}}/ORC-1299) Fix fetch data error in bench module 21 | 22 | The tasks: 23 | - [ORC-1256]({{site.jira}}/ORC-1256) Publish tests jar to maven central 24 | - [ORC-1268]({{site.jira}}/ORC-1268) Set CMP0135 policy for CMake 3.24+ 25 | -------------------------------------------------------------------------------- /site/_posts/2023-01-21-ORC-1.7.8.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.7.8 Released" 4 | date: "2023-01-21 03:00:00 -0800" 5 | author: william 6 | version: 1.7.8 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.7.8. 11 | 12 | {% assign releaseName = "1.7.8" %} 13 | {% include release_description.md %} 14 | 15 | The improvements: 16 | - [ORC-1342]({{site.jira}}/ORC-1342) Publish SBOM artifacts 17 | - [ORC-1344]({{site.jira}}/ORC-1344) Skip SBOM generation during CMake 18 | - [ORC-1345]({{site.jira}}/ORC-1345) Use `makeBom` and skip snapshot check in GitHub Action `publish_snapshot` job 19 | 20 | The bug fixes: 21 | - [ORC-1332]({{site.jira}}/ORC-1332) Avoid `NegativeArraySizeException` when using `searchArgument` 22 | - [ORC-1343]({{site.jira}}/ORC-1343) Ignore `orc.create.index` 23 | 24 | The test changes: 25 | - [ORC-1323]({{site.jira}}/ORC-1323) Make `docker/reinit.sh` support target OS arguments -------------------------------------------------------------------------------- /site/_posts/2023-02-13-add-xin.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "Xin Zhang added as committer" 4 | date: "2023-02-13 12:00:00 -0700" 5 | author: gangwu 6 | categories: [team] 7 | --- 8 | 9 | The ORC PMC is happy to add Xin Zhang as an ORC committer 10 | for the work on ORC C++ library. 11 | 12 | Thank you for your work on ORC, Xin! 13 | -------------------------------------------------------------------------------- /site/_posts/2023-03-15-ORC-1.8.3.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.8.3 Released" 4 | date: "2023-03-15 03:00:00 -0800" 5 | author: dongjoon 6 | version: 1.8.3 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.8.3. 11 | 12 | {% assign releaseName = "1.8.3" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1357]({{site.jira}}/ORC-1357): Handle missing compression block size 17 | - [ORC-1382]({{site.jira}}/ORC-1382): Fix secondary config names `org.sarg.*` to `orc.sarg.*` 18 | - [ORC-1384]({{site.jira}}/ORC-1384): Fix `ArrayIndexOutOfBoundsException` when reading dictionary stream bigger then dictionary 19 | - [ORC-1393]({{site.jira}}/ORC-1393): Add `reset(DiskRangeList input, long length)` to `InStream` impl class 20 | 21 | The test changes: 22 | - [ORC-1360]({{site.jira}}/ORC-1360) Pin `mockito` to 4.x 23 | - [ORC-1364]({{site.jira}}/ORC-1364) Pin `spotless` to 2.30.0 24 | - [ORC-1374]({{site.jira}}/ORC-1323) Update Spark to 3.3.2 25 | 26 | The tasks: 27 | - [ORC-1358]({{site.jira}}/ORC-1358) Use spotless to format pom files 28 | -------------------------------------------------------------------------------- /site/_posts/2023-05-07-ORC-1.7.9.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.7.9 Released" 4 | date: "2023-05-07 00:00:00 0000" 5 | author: gangwu 6 | version: 1.7.9 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.7.9. 11 | 12 | {% assign releaseName = "1.7.9" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1382]({{site.jira}}/ORC-1382) Fix secondary config names `org.sarg.*` to `orc.sarg.*` 17 | - [ORC-1395]({{site.jira}}/ORC-1395) Enforce license check 18 | - [ORC-1407]({{site.jira}}/ORC-1407) Upgrade cyclonedx-maven-plugin to 2.7.6 19 | 20 | The test changes: 21 | - [ORC-1374]({{site.jira}}/ORC-1374) Update Spark to 3.3.2 22 | -------------------------------------------------------------------------------- /site/_posts/2023-06-14-ORC-1.8.4.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.8.4 Released" 4 | date: "2023-06-14 17:49:00 +0800" 5 | author: yqzhang 6 | version: 1.8.4 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.8.4. 11 | 12 | {% assign releaseName = "1.8.4" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1304]({{site.jira}}/ORC-1304): [C++] Fix seeking over empty PRESENT stream 17 | - [ORC-1400]({{site.jira}}/ORC-1400): Use Hadoop 3.3.5 on Java 17+ and benchmark 18 | - [ORC-1413]({{site.jira}}/ORC-1413): Fix for ORC row level filter issue with ACID table 19 | 20 | 21 | The test changes: 22 | - [ORC-1404]({{site.jira}}/ORC-1404) Bump `parquet` to 1.13.0 23 | - [ORC-1414]({{site.jira}}/ORC-1414) Upgrade java bench module to spark3.4 24 | - [ORC-1416]({{site.jira}}/ORC-1416) Upgrade Jackson dependency to 2.14.2 in bench module 25 | - [ORC-1420]({{site.jira}}/ORC-1420) Pin `net.bytebuddy` package to 1.12.x 26 | 27 | 28 | The tasks: 29 | - [ORC-1395]({{site.jira}}/ORC-1395) Enforce license check via github action 30 | -------------------------------------------------------------------------------- /site/_posts/2023-08-16-ORC-1.9.1.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.9.1 Released" 4 | date: "2023-08-16 01:00:00 -0800" 5 | author: dongjoon 6 | version: 1.9.1 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.9.1. 11 | 12 | {% assign releaseName = "1.9.1" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1455]({{site.jira}}/ORC-1455) Fix build failure on non-x86 with unused macro in CpuInfoUtil.cc 17 | - [ORC-1457]({{site.jira}}/ORC-1457) Fix ambiguous overload of Type::createRowBatch 18 | - [ORC-1462]({{site.jira}}/ORC-1462) Bump aircompressor to 0.25 to fix JDK-8081450 19 | 20 | The test changes: 21 | - [ORC-1432]({{site.jira}}/ORC-1432) Add MacOS 13 GitHub Action Job 22 | - [ORC-1464]({{site.jira}}/ORC-1464) Bump avro to 1.11.2 23 | - [ORC-1465]({{site.jira}}/ORC-1465) Bump snappy-java to 1.1.10.3 24 | -------------------------------------------------------------------------------- /site/_posts/2023-09-05-ORC-1.8.5.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.8.5 Released" 4 | date: "2023-09-05 22:57:00 +0800" 5 | author: gangwu 6 | version: 1.8.5 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.8.5. 11 | 12 | {% assign releaseName = "1.8.5" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1315]({{site.jira}}/ORC-1315): [C++] Byte to integer conversions fail on platforms with unsigned char type 17 | - [ORC-1482]({{site.jira}}/ORC-1482): RecordReaderImpl.evaluatePredicateProto assumes floating point stats are always present 18 | 19 | The tasks: 20 | - [ORC-1489]({{site.jira}}/ORC-1489) Assign a writer id to CUDF 21 | -------------------------------------------------------------------------------- /site/_posts/2023-11-10-ORC-1.7.10.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.7.10 Released" 4 | date: "2023-11-10 00:00:00 0000" 5 | author: dongjoon 6 | version: 1.7.10 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.7.10. 11 | 12 | {% assign releaseName = "1.7.10" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1304]({{site.jira}}/ORC-1304) [C++] Fix seeking over empty PRESENT stream 17 | - [ORC-1413]({{site.jira}}/ORC-1413) Fix for ORC row level filter issue with ACID table 18 | 19 | The task changes: 20 | - [ORC-1482]({{site.jira}}/ORC-1482) Adaptation to read ORC files created by CUDF 21 | - [ORC-1489]({{site.jira}}/ORC-1489) Assign a writer id to CUDF 22 | -------------------------------------------------------------------------------- /site/_posts/2023-11-10-ORC-1.8.6.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.8.6 Released" 4 | date: "2023-11-10 00:00:00 0000" 5 | author: dongjoon 6 | version: 1.8.6 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.8.6. 11 | 12 | {% assign releaseName = "1.8.6" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1525]({{site.jira}}/ORC-1525) Fix bad read in RleDecoderV2::readByte 17 | 18 | The test changes: 19 | - [ORC-1432]({{site.jira}}/ORC-1432) Add MacOS 13 GitHub Action Job 20 | 21 | Documentations: 22 | - [ORC-1499]({{site.jira}}/ORC-1499) Add MacOS 13 and 14 to building.md 23 | -------------------------------------------------------------------------------- /site/_posts/2024-01-13-add-deshan.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "Deshan Xiao added as committer" 4 | date: "2024-01-13 12:00:00 -0700" 5 | author: dongjoon 6 | categories: [team] 7 | --- 8 | 9 | The ORC PMC is happy to add Deshan Xiao as an ORC committer 10 | for the work on ORC Java Brotli codec and vcpkg C++ library. 11 | 12 | Thank you for your work on ORC, Deshan! 13 | -------------------------------------------------------------------------------- /site/_posts/2024-04-14-ORC-1.8.7.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.8.7 Released" 4 | date: "2024-04-14 03:00:00 -0800" 5 | author: dongjoon 6 | version: 1.8.7 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.8.7. 11 | 12 | {% assign releaseName = "1.8.7" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1528]({{site.jira}}/ORC-1528): Fix readBytes potential overflow in RecordReaderUtils.ChunkReader#create 17 | - [ORC-1602]({{site.jira}}/ORC-1602): [C++] limit compression block size 18 | 19 | The test changes: 20 | - [ORC-1556]({{site.jira}}/ORC-1556) Add Rocky Linux 9 Docker Test 21 | - [ORC-1557]({{site.jira}}/ORC-1557) Add GitHub Action CI for Docker Test 22 | - [ORC-1560]({{site.jira}}/ORC-1560) Remove Java11 and clang variants from docker/os-list.txt in branch-1.8 23 | - [ORC-1562]({{site.jira}}/ORC-1562) Bump guava to 33.0.0-jre 24 | - [ORC-1578]({{site.jira}}/ORC-1578) Fix SparkBenchmark on sales data according to SPARK-40918 25 | - [ORC-1621]({{site.jira}}/ORC-1621) Switch to oraclelinux9 from rocky9 26 | 27 | The documentations: 28 | - [ORC-1536]({{site.jira}}/ORC-1536) Remove hive-storage-api link from maven-javadoc-plugin 29 | - [ORC-1563]({{site.jira}}/ORC-1563) Fix orc.bloom.filter.fpp default value and orc.compress notes of Spark and Hive config docs 30 | -------------------------------------------------------------------------------- /site/_posts/2024-05-13-add-shaoyun-and-yuanping.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "Shaoyun Chen and Yuanping Wu added as committers" 4 | date: "2024-05-13 12:00:00 -0700" 5 | author: gangwu 6 | categories: [team] 7 | --- 8 | 9 | The ORC PMC is happy to add Shaoyun Chen and Yuanping Wu as 10 | committers for their work on ORC Java and C++ library. 11 | 12 | Thank you for your work on ORC, Shaoyun and Yuanping! 13 | -------------------------------------------------------------------------------- /site/_posts/2024-07-16-ORC-1.9.4.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.9.4 Released" 4 | date: "2024-07-16 03:00:00 -0800" 5 | author: william 6 | version: 1.9.4 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.9.4. 11 | 12 | {% assign releaseName = "1.9.4" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1696]({{site.jira}}/ORC-1696) Fix ClassCastException when reading avro decimal type in bechmark 17 | - [ORC-1721]({{site.jira}}/ORC-1721) Upgrade `aircompressor` to 0.27 18 | - [ORC-1738]({{site.jira}}/ORC-1738) Wrong Int128 maximum value 19 | 20 | The test changes: 21 | - [ORC-1619]({{site.jira}}/ORC-1619) Add `MacOS 14` to GitHub Action 22 | - [ORC-1699]({{site.jira}}/ORC-1699) Fix SparkBenchmark in Parquet format according to SPARK-40918 23 | 24 | The task changes: 25 | - [ORC-1540]({{site.jira}}/ORC-1540) Remove MacOS 11 from GitHub Action CI 26 | -------------------------------------------------------------------------------- /site/_posts/2024-09-13-ORC-1.7.11.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.7.11 Released" 4 | date: "2024-09-13 03:00:00 -0800" 5 | author: dongjoon 6 | version: 1.7.11 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.7.11. 11 | 12 | {% assign releaseName = "1.7.11" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1602]({{site.jira}}/ORC-1602) [C++] limit compression block size 17 | - [ORC-1738]({{site.jira}}/ORC-1738) [C++] Fix wrong Int128 maximum value 18 | 19 | The 'tests' fixes: 20 | - [ORC-1540]({{site.jira}}/ORC-1540) Remove MacOS 11 from GitHub Action CI and docs 21 | - [ORC-1556]({{site.jira}}/ORC-1556) Add `Rocky Linux 9` Docker Test 22 | - [ORC-1557]({{site.jira}}/ORC-1557) Add GitHub Action CI for `Docker Test` 23 | - [ORC-1561]({{site.jira}}/ORC-1561) Remove Java11 and clang variants from `docker/os-list.txt` in `branch-1.7` 24 | - [ORC-1578]({{site.jira}}/ORC-1578) Fix `SparkBenchmark` on `sales` data according to SPARK-40918 25 | - [ORC-1696]({{site.jira}}/ORC-1696) Fix ClassCastException when reading avro decimal type in bechmark 26 | -------------------------------------------------------------------------------- /site/_posts/2024-11-11-ORC-1.8.8.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.8.8 Released" 4 | date: "2024-11-11 22:00:00 +0800" 5 | author: wgtmac 6 | version: 1.8.8 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.8.8. 11 | 12 | {% assign releaseName = "1.8.8" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1696]({{site.jira}}/ORC-1696): Fix ClassCastException when reading avro decimal type in benchmark 17 | - [ORC-1738]({{site.jira}}/ORC-1738): [C++] Wrong Int128 maximum value 18 | 19 | The test changes: 20 | - [ORC-1793]({{site.jira}}/ORC-1793) Upgrade Spark to 3.4.4 21 | 22 | The tasks: 23 | - [ORC-1540]({{site.jira}}/ORC-1540) Remove MacOS 11 from GitHub Action CI 24 | -------------------------------------------------------------------------------- /site/_posts/2024-11-14-ORC-1.9.5.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.9.5 Released" 4 | date: "2024-11-14 03:00:00 -0800" 5 | author: dongjoon 6 | version: 1.9.5 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.9.5. 11 | 12 | {% assign releaseName = "1.9.5" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1741]({{site.jira}}/ORC-1741) Respect decimal reader isRepeating flag 17 | 18 | The test changes: 19 | - [ORC-1792]({{site.jira}}/ORC-1792) Upgrade Spark to 3.5.3 20 | -------------------------------------------------------------------------------- /site/_posts/2025-03-23-shaoyun-pmc.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC adds Shaoyun Chen to PMC" 4 | date: "2025-03-23 12:00:00 -0800" 5 | author: dongjoon 6 | categories: [team] 7 | --- 8 | 9 | The Apache ORC Project Management Committee (PMC) is happy to announce that Shaoyun Chen has joined us as a new member of the PMC. Chaoyun has been showing consistent contributions as a committer, and participated in both major and maintenance releases by actively helping the release managers with testing the release candidates. 10 | 11 | Please join me in welcoming Shaoyun to the ORC PMC! 12 | 13 | -------------------------------------------------------------------------------- /site/_posts/2025-05-06-ORC-1.8.9.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.8.9 Released" 4 | date: "2025-05-06 22:00:00 +0800" 5 | author: wgtmac 6 | version: 1.8.9 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.8.9. 11 | 12 | {% assign releaseName = "1.8.9" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1866]({{site.jira}}/ORC-1866) Avoid zlib decompression infinite loop 17 | - [ORC-1879]({{site.jira}}/ORC-1879) Fix Heap Buffer Overflow in LZO Decompression 18 | 19 | The test changes: 20 | - [ORC-1745]({{site.jira}}/ORC-1745) Remove `Ubuntu 20.04` Support 21 | - [ORC-1776]({{site.jira}}/ORC-1776) Remove `MacOS 12` from GitHub Action CI and docs 22 | - [ORC-1870]({{site.jira}}/ORC-1870) Remove Java 18 test pipeline from `branch-1.8` 23 | 24 | The tasks: 25 | - [ORC-1411]({{site.jira}}/ORC-1411) Remove Ubuntu18.04 from docker-based tests 26 | - [ORC-1709]({{site.jira}}/ORC-1709) Upgrade GitHub Action `setup-java` to v4 and use built-in cache feature 27 | -------------------------------------------------------------------------------- /site/_posts/2025-05-06-ORC-1.9.6.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 1.9.6 Released" 4 | date: "2025-05-06 22:00:00 +0800" 5 | author: wgtmac 6 | version: 1.9.6 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v1.9.6. 11 | 12 | {% assign releaseName = "1.9.6" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1866]({{site.jira}}/ORC-1866) Avoid zlib decompression infinite loop 17 | - [ORC-1879]({{site.jira}}/ORC-1879) Fix Heap Buffer Overflow in LZO Decompression 18 | - [ORC-1885]({{site.jira}}/ORC-1885) Update all `ubuntu-20.04` to `ubuntu-22.04` in CI 19 | 20 | The test changes: 21 | - [ORC-1745]({{site.jira}}/ORC-1745) Remove `Ubuntu 20.04` Support 22 | - [ORC-1776]({{site.jira}}/ORC-1776) Remove `MacOS 12` from GitHub Action CI and docs 23 | - [ORC-1818]({{site.jira}}/ORC-1818) Upgrade Spark to 3.5.4 in bench module 24 | - [ORC-1869]({{site.jira}}/ORC-1869) Upgrade Spark to 3.5.5 in bench module for Apache ORC 1.9.x 25 | 26 | The tasks: 27 | - [ORC-1709]({{site.jira}}/ORC-1709) Upgrade GitHub Action `setup-java` to v4 and use built-in cache feature 28 | -------------------------------------------------------------------------------- /site/_posts/2025-05-06-ORC-2.0.5.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 2.0.5 Released" 4 | date: "2025-05-06 00:00:00 0000" 5 | author: dongjoon 6 | version: 2.0.5 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v2.0.5. 11 | 12 | {% assign releaseName = "2.0.5" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1866]({{site.jira}}/ORC-1866) Avoid zlib decompression infinite loop 17 | - [ORC-1879]({{site.jira}}/ORC-1879) [C++] Fix Heap Buffer Overflow in LZO Decompression 18 | - [ORC-1881]({{site.jira}}/ORC-1881) [C++] Populate dstBatch's scale and precision in DecimalConvertColumnReader 19 | 20 | The test changes: 21 | - [ORC-1745]({{site.jira}}/ORC-1745) Remove `Ubuntu 20.04` Support 22 | - [ORC-1822]({{site.jira}}/ORC-1822) [C++][CI] Use cpp-linter-action for clang-tidy and clang-format 23 | - [ORC-1835]({{site.jira}}/ORC-1835) [C++] Fix cpp-linter-action to build first 24 | - [ORC-1871]({{site.jira}}/ORC-1871) [C++] Include iomanip at TestDictionaryEncoding and TestConvertColumnReader 25 | -------------------------------------------------------------------------------- /site/_posts/2025-05-06-ORC-2.1.2.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news_item 3 | title: "ORC 2.1.2 Released" 4 | date: "2025-05-06 00:00:00 0000" 5 | author: dongjoon 6 | version: 2.1.2 7 | categories: [release] 8 | --- 9 | 10 | The ORC team is excited to announce the release of ORC v2.1.2. 11 | 12 | {% assign releaseName = "2.1.2" %} 13 | {% include release_description.md %} 14 | 15 | The bug fixes: 16 | - [ORC-1866]({{site.jira}}/ORC-1866) Avoid zlib decompression infinite loop 17 | - [ORC-1879]({{site.jira}}/ORC-1879) [C++] Fix Heap Buffer Overflow in LZO Decompression 18 | - [ORC-1881]({{site.jira}}/ORC-1881) [C++] Populate dstBatch's scale and precision in DecimalConvertColumnReader 19 | 20 | The test changes: 21 | - [ORC-1871]({{site.jira}}/ORC-1871) [C++] Include iomanip at TestDictionaryEncoding and TestConvertColumnReader 22 | - [ORC-1872]({{site.jira}}/ORC-1872) Upgrade `extra-enforcer-rules` to 1.10.0 23 | - [ORC-1875]({{site.jira}}/ORC-1875) Support `ubuntu-24.04-arm` in GitHub Action CIs 24 | 25 | The build and dependency changes: 26 | - [ORC-1876]({{site.jira}}/ORC-1876) Upgrade to ORC Format 1.1 27 | -------------------------------------------------------------------------------- /site/_sass/_font-awesome.scss: -------------------------------------------------------------------------------- 1 | /*! 2 | * Font Awesome 4.2.0 by @davegandy - http://fontawesome.io - @fontawesome 3 | * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License) 4 | */ 5 | @font-face { 6 | font-family: 'FontAwesome'; 7 | src: url('../fonts/fontawesome-webfont.eot?v=4.2.0'); 8 | src: url('../fonts/fontawesome-webfont.eot?#iefix&v=4.2.0') format('embedded-opentype'), url('../fonts/fontawesome-webfont.woff?v=4.2.0') format('woff'), url('../fonts/fontawesome-webfont.ttf?v=4.2.0') format('truetype'), url('../fonts/fontawesome-webfont.svg?v=4.2.0#fontawesomeregular') format('svg'); 9 | font-weight: normal; 10 | font-style: normal; 11 | } 12 | .fa { 13 | display: inline-block; 14 | font: normal normal normal 14px/1 FontAwesome; 15 | font-size: inherit; 16 | text-rendering: auto; 17 | -webkit-font-smoothing: antialiased; 18 | -moz-osx-font-smoothing: grayscale; 19 | } 20 | .fa-link:before { 21 | content: "\f0c1"; 22 | } 23 | .fa-pencil:before { 24 | content: "\f040"; 25 | } 26 | -------------------------------------------------------------------------------- /site/_sass/_mixins.scss: -------------------------------------------------------------------------------- 1 | @mixin box-shadow($shadow...) { 2 | -webkit-box-shadow: $shadow; 3 | -moz-box-shadow: $shadow; 4 | box-shadow: $shadow; 5 | } 6 | 7 | @mixin border-radius($radius...) { 8 | -webkit-border-radius: $radius; 9 | -moz-border-radius: $radius; 10 | border-radius: $radius; 11 | } 12 | 13 | @mixin border-top-left-radius($radius...) { 14 | -webkit-border-top-left-radius: $radius; 15 | -moz-border-radius-topleft: $radius; 16 | border-top-left-radius: $radius; 17 | } 18 | 19 | @mixin border-top-right-radius($radius...) { 20 | -webkit-border-top-right-radius: $radius; 21 | -moz-border-radius-topright: $radius; 22 | border-top-right-radius: $radius; 23 | } 24 | 25 | @mixin transition($transition...) { 26 | -webkit-transition: $transition; 27 | -moz-transition: $transition; 28 | -o-transition: $transition; 29 | transition: $transition; 30 | } 31 | 32 | @mixin user-select($select...) { 33 | -webkit-user-select: $select; /* Chrome all / Safari all */ 34 | -moz-user-select: $select; /* Firefox all */ 35 | -ms-user-select: $select; /* IE 10+ */ 36 | -o-user-select: $select; 37 | user-select: $select; 38 | } 39 | -------------------------------------------------------------------------------- /site/develop/design/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: Design 4 | --- 5 | 6 | * [Lazy Filters](lazy_filter) 7 | * [IO](io) -------------------------------------------------------------------------------- /site/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/site/favicon.ico -------------------------------------------------------------------------------- /site/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/site/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /site/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/site/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /site/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/site/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /site/help/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: Getting Help 4 | --- 5 | 6 | Need help with ORC? Try these resources. 7 | 8 | ## Mailing Lists 9 | 10 | The best option is to send email to the user list 11 | [user@orc.apache.org](mailto:user@orc.apache.org), which is for user 12 | discussions. All of the historic traffic is available in the 13 | [archive](https://mail-archives.apache.org/mod_mbox/orc-user/). To 14 | subscribe to the user list, please send email to 15 | [user-subscribe@orc.apache.org](mailto:user-subscribe@orc.apache.org). 16 | 17 | ## Bug Reports 18 | 19 | Please file any issues you encounter or fixes you'd like on the 20 | [ORC Jira](https://issues.apache.org/jira/browse/orc). We welcome 21 | patches! 22 | 23 | ## StackOverflow 24 | 25 | [StackOverflow](https://stackoverflow.com) is a wonderful resource for 26 | any developer. Take a look over there to see if someone has answered 27 | your question. 28 | 29 | ## Browse the code 30 | 31 | One of the advantages of open source software is that you can browse the code. 32 | The code is available on [github](https://github.com/apache/orc/tree/main). 33 | -------------------------------------------------------------------------------- /site/img/BloomFilter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/site/img/BloomFilter.png -------------------------------------------------------------------------------- /site/img/CompressionStream.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/site/img/CompressionStream.png -------------------------------------------------------------------------------- /site/img/Direct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/site/img/Direct.png -------------------------------------------------------------------------------- /site/img/OrcFileLayout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/site/img/OrcFileLayout.png -------------------------------------------------------------------------------- /site/img/TreeWriters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/site/img/TreeWriters.png -------------------------------------------------------------------------------- /site/img/asf_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/site/img/asf_logo.png -------------------------------------------------------------------------------- /site/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/site/img/logo.png -------------------------------------------------------------------------------- /site/img/seekvsread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/orc/9c6fef5bf50da0ea47b6cae1bf679032483dfec7/site/img/seekvsread.png -------------------------------------------------------------------------------- /site/news/index.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news 3 | title: News 4 | permalink: /news/ 5 | author: all 6 | --- 7 | 8 | {% for post in site.posts %} 9 | {% include news_item.html %} 10 | {% endfor %} 11 | -------------------------------------------------------------------------------- /site/news/releases/index.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: news 3 | title: Releases 4 | permalink: /news/releases/ 5 | author: all 6 | --- 7 | 8 | {% for post in site.categories.release %} 9 | {% include news_item.html %} 10 | {% endfor %} 11 | -------------------------------------------------------------------------------- /site/releases/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: Releases 4 | --- 5 | {% for relItr in site.data.releases %} 6 | {% if relItr[1]["state"] == "latest" %} 7 | {% assign releaseName = relItr[0] %} 8 | {% break %} 9 | {% endif %} 10 | {% endfor %} 11 | 12 | ## Current Release - {{ releaseName }}: 13 | 14 | ORC {{ releaseName }} contains both the Java and C++ reader and writer 15 | for ORC files. It also contains tools for working with ORC 16 | files and looking at their contents and metadata. 17 | 18 | {% include release_description.md %} 19 | {% include orc_2.1.md %} 20 | {% include known_issues.md %} 21 | 22 | ## Checking signatures 23 | 24 | Verify the releases by following [ASF procedures](https://www.apache.org/info/verification.html). 25 | All GPG signatures should be verified as matching one of the keys in ORC's 26 | committers' [key list]({{ site.dist }}/KEYS). 27 | 28 | ~~~ shell 29 | % shasum -a 256 orc-X.Y.Z.tgz | diff - orc-X.Y.Z.tgz.sha256 30 | % gpg --import KEYS 31 | % gpg --verify orc-X.Y.Z.tgz.asc 32 | ~~~ 33 | 34 | ## All releases: 35 | 36 | {% include release_table.html %} 37 | -------------------------------------------------------------------------------- /site/releases/releases.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: docs 3 | title: Releases 4 | permalink: /releases.html 5 | --- 6 | {% for relItr in site.data.releases %} 7 | {% if relItr[1]["state"] == "latest" %} 8 | {% assign releaseName = relItr[0] %} 9 | {% break %} 10 | {% endif %} 11 | {% endfor %} 12 | 13 | ## Current Release - {{ releaseName }}: 14 | 15 | ORC {{ releaseName }} contains both the Java and C++ reader and writer 16 | for ORC files. It also contains tools for working with ORC 17 | files and looking at their contents and metadata. 18 | 19 | {% include release_description.md %} 20 | {% include orc_2.1.md %} 21 | {% include known_issues.md %} 22 | 23 | ## Checking signatures 24 | 25 | Verify the releases by following [ASF procedures](https://www.apache.org/info/verification.html). 26 | All GPG signatures should be verified as matching one of the keys in ORC's 27 | committers' [key list]({{ site.dist }}/KEYS). 28 | 29 | ~~~ shell 30 | % shasum -a 256 orc-X.Y.Z.tgz | diff - orc-X.Y.Z.tgz.sha256 31 | % gpg --import KEYS 32 | % gpg --verify orc-X.Y.Z.tgz.asc 33 | ~~~ 34 | 35 | ## All releases: 36 | 37 | {% include release_table.html %} 38 | -------------------------------------------------------------------------------- /site/security/CVE-2018-8015.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: CVE-2018-8015 4 | --- 5 | 6 | # Apache ORC denial of service vulnerability 7 | 8 | ## Date: 9 | 2018-05-17 10 | 11 | ## Severity: 12 | 13 | Medium 14 | 15 | ## Vendor: 16 | 17 | [The Apache Software Foundation](https://apache.org) 18 | 19 | ## Versions Affected: 20 | 21 | * ORC 1.0.0 to 1.4.3 22 | 23 | ## Description: 24 | 25 | A malformed ORC file can trigger an endlessly recursive function call in the 26 | C++ or Java parser. 27 | 28 | The impact of this bug is most likely denial-of-service against software that 29 | uses the ORC file parser. With the C++ parser, the stack overflow might 30 | possibly corrupt the stack. 31 | 32 | ## Mitigation: 33 | 34 | * 1.3.x and 1.4.x users should upgrade to 1.4.4. 35 | * 1.0.x to 1.2.x users should apply ORC-360 (Java) and ORC-313 (C++). 36 | 37 | ## Example: 38 | 39 | An ORC file with a struct, union, array, or map type that includes itself as 40 | a child will cause the parser to infinitely recurse until the stack overflows. 41 | 42 | ## Credit: 43 | This issue was discovered by Terry Chia. 44 | 45 | ## References: 46 | [Apache ORC security](/security) -------------------------------------------------------------------------------- /site/security/CVE-2025-47436.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: CVE-2025-47436 4 | --- 5 | 6 | # Potential Heap Buffer Overflow during C++ LZO Decompression 7 | 8 | ## Date: 9 | 2025-05-13 10 | 11 | ## Severity: 12 | 13 | Medium 14 | 15 | ## Vendor: 16 | 17 | [The Apache Software Foundation](https://apache.org) 18 | 19 | ## Versions Affected: 20 | 21 | - Apache ORC through 1.8.8 22 | - Apache ORC 1.9.0 through 1.9.5 23 | - Apache ORC 2.0.0 through 2.0.4 24 | - Apache ORC 2.1.0 through 2.1.1 25 | 26 | ## Description: 27 | 28 | A vulnerability has been identified in the ORC C++ LZO decompression logic, 29 | where specially crafted malformed ORC files can cause the decompressor 30 | to allocate a 250-byte buffer but then attempts to copy 295 bytes into it. 31 | It causes memory corruption due to insufficient input buffer boundary validation during decompression. 32 | 33 | This issue is being tracked as ORC-1879 34 | 35 | ## Mitigation: 36 | 37 | * Upgrade to 1.8.9, 1.9.6, 2.0.5, and 2.1.2 38 | 39 | ## Credit: 40 | 41 | This issue was discovered by Jason Villaluna. 42 | 43 | ## References: 44 | [Apache ORC security](/security) 45 | -------------------------------------------------------------------------------- /site/specification/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: ORC Specification 4 | --- 5 | 6 | There have been two released ORC file versions: 7 | 8 | * [ORC v0](ORCv0) was released in Hive 0.11. 9 | * [ORC v1](ORCv1) was released in Hive 0.12 and ORC 1.x. 10 | 11 | Each version of the library will detect the format version and use 12 | the appropriate reader. The library can also write the older versions 13 | of the file format to ensure that users can write files that all of their 14 | clusters can read correctly. 15 | 16 | We are working on a new version of the file format: 17 | 18 | * [ORC v2](ORCv2) is a work in progress and is rapidly evolving. 19 | -------------------------------------------------------------------------------- /tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # Licensed under the Apache License, Version 2.0 (the "License"); 19 | # you may not use this file except in compliance with the License. 20 | # You may obtain a copy of the License at 21 | # 22 | # http://www.apache.org/licenses/LICENSE-2.0 23 | # 24 | # Unless required by applicable law or agreed to in writing, software 25 | # distributed under the License is distributed on an "AS IS" BASIS, 26 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 27 | # See the License for the specific language governing permissions and 28 | # limitations under the License. 29 | 30 | add_subdirectory(src) 31 | 32 | if (BUILD_CPP_TESTS) 33 | add_subdirectory(test) 34 | endif () 35 | -------------------------------------------------------------------------------- /tools/src/TimezoneDump.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #include "orc/Exceptions.hh" 20 | 21 | #include "Timezone.hh" 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | void printFile(const std::string& name) { 28 | std::cout << "Timezone " << name << ":\n"; 29 | const orc::Timezone& tz = orc::getTimezoneByName(name); 30 | tz.print(std::cout); 31 | } 32 | 33 | int main(int argc, char* argv[]) { 34 | if (argc < 2) { 35 | std::cout << "Usage: timezone-dump\n"; 36 | } 37 | for (int o = 1; o < argc; ++o) { 38 | printFile(argv[o]); 39 | } 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /tools/src/ToolsHelper.hh: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #include 20 | #include "orc/ColumnPrinter.hh" 21 | #include "orc/Reader.hh" 22 | 23 | void printOptions(std::ostream& out); 24 | 25 | bool parseOptions(int* argc, char** argv[], uint64_t* batchSize, 26 | orc::RowReaderOptions* rowReaderOpts, bool* showMetrics); 27 | 28 | void printReaderMetrics(std::ostream& out, const orc::ReaderMetrics* metrics); 29 | --------------------------------------------------------------------------------