├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── hive-dwrf-shims ├── pom.xml └── src │ └── main │ └── java │ └── org │ └── apache │ └── hadoop │ └── hive │ ├── common │ └── io │ │ └── RawDatasizeConst.java │ ├── ql │ └── io │ │ └── slice │ │ ├── BasicSliceInput.java │ │ ├── BasicSliceOutput.java │ │ ├── InputStreamSliceInput.java │ │ ├── OutputStreamSliceOutput.java │ │ ├── RuntimeIOException.java │ │ ├── SizeOf.java │ │ ├── Slice.java │ │ ├── SliceInput.java │ │ ├── SliceOutput.java │ │ └── Slices.java │ └── serde2 │ ├── ReaderWriterProfiler.java │ └── objectinspector │ └── FBSettableUnionObjectInspector.java ├── hive-dwrf ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── facebook │ │ │ └── hive │ │ │ └── orc │ │ │ ├── BitFieldReader.java │ │ │ ├── BitFieldWriter.java │ │ │ ├── DictionaryEncoder.java │ │ │ ├── DynamicArray.java │ │ │ ├── DynamicByteArray.java │ │ │ ├── DynamicIntArray.java │ │ │ ├── DynamicLongArray.java │ │ │ ├── FileDump.java │ │ │ ├── InStream.java │ │ │ ├── IntDictionaryEncoder.java │ │ │ ├── MemoryEstimate.java │ │ │ ├── MemoryManager.java │ │ │ ├── NotAnORCFileException.java │ │ │ ├── OrcConf.java │ │ │ ├── OrcFile.java │ │ │ ├── OrcInputFormat.java │ │ │ ├── OrcOutputFormat.java │ │ │ ├── OrcSerde.java │ │ │ ├── OrcStruct.java │ │ │ ├── OrcUnion.java │ │ │ ├── OutStream.java │ │ │ ├── PositionRecorder.java │ │ │ ├── PositionedOutputStream.java │ │ │ ├── Reader.java │ │ │ ├── ReaderImpl.java │ │ │ ├── RecordReader.java │ │ │ ├── RecordReaderImpl.java │ │ │ ├── RunLengthByteReader.java │ │ │ ├── RunLengthByteWriter.java │ │ │ ├── RunLengthConstants.java │ │ │ ├── RunLengthIntegerReader.java │ │ │ ├── RunLengthIntegerWriter.java │ │ │ ├── SerializationUtils.java │ │ │ ├── StreamName.java │ │ │ ├── StringDictionaryEncoder.java │ │ │ ├── StripeInformation.java │ │ │ ├── StripeReader.java │ │ │ ├── Writer.java │ │ │ ├── WriterImpl.java │ │ │ ├── compression │ │ │ ├── CompressionCodec.java │ │ │ ├── CompressionKind.java │ │ │ ├── SnappyCodec.java │ │ │ └── ZlibCodec.java │ │ │ ├── lazy │ │ │ ├── LazyBinaryTreeReader.java │ │ │ ├── LazyBooleanTreeReader.java │ │ │ ├── LazyByteTreeReader.java │ │ │ ├── LazyDoubleTreeReader.java │ │ │ ├── LazyFloatTreeReader.java │ │ │ ├── LazyIntDictionaryTreeReader.java │ │ │ ├── LazyIntDirectTreeReader.java │ │ │ ├── LazyIntTreeReader.java │ │ │ ├── LazyListTreeReader.java │ │ │ ├── LazyLongDictionaryTreeReader.java │ │ │ ├── LazyLongDirectTreeReader.java │ │ │ ├── LazyLongTreeReader.java │ │ │ ├── LazyMapTreeReader.java │ │ │ ├── LazyNumericDictionaryTreeReader.java │ │ │ ├── LazyNumericDirectTreeReader.java │ │ │ ├── LazyShortDictionaryTreeReader.java │ │ │ ├── LazyShortDirectTreeReader.java │ │ │ ├── LazyShortTreeReader.java │ │ │ ├── LazyStringDictionaryTreeReader.java │ │ │ ├── LazyStringDirectTreeReader.java │ │ │ ├── LazyStringTreeReader.java │ │ │ ├── LazyStructTreeReader.java │ │ │ ├── LazyTimestampTreeReader.java │ │ │ ├── LazyTreeReader.java │ │ │ ├── LazyUnionTreeReader.java │ │ │ ├── OrcLazyBinary.java │ │ │ ├── OrcLazyBinaryObjectInspector.java │ │ │ ├── OrcLazyBoolean.java │ │ │ ├── OrcLazyBooleanObjectInspector.java │ │ │ ├── OrcLazyByte.java │ │ │ ├── OrcLazyByteObjectInspector.java │ │ │ ├── OrcLazyDouble.java │ │ │ ├── OrcLazyDoubleObjectInspector.java │ │ │ ├── OrcLazyFloat.java │ │ │ ├── OrcLazyFloatObjectInspector.java │ │ │ ├── OrcLazyInt.java │ │ │ ├── OrcLazyIntObjectInspector.java │ │ │ ├── OrcLazyList.java │ │ │ ├── OrcLazyListObjectInspector.java │ │ │ ├── OrcLazyLong.java │ │ │ ├── OrcLazyLongObjectInspector.java │ │ │ ├── OrcLazyMap.java │ │ │ ├── OrcLazyMapObjectInspector.java │ │ │ ├── OrcLazyObject.java │ │ │ ├── OrcLazyObjectInspectorUtils.java │ │ │ ├── OrcLazyPrimitiveObjectInspector.java │ │ │ ├── OrcLazyRow.java │ │ │ ├── OrcLazyRowObjectInspector.java │ │ │ ├── OrcLazyShort.java │ │ │ ├── OrcLazyShortObjectInspector.java │ │ │ ├── OrcLazyString.java │ │ │ ├── OrcLazyStringObjectInspector.java │ │ │ ├── OrcLazyStruct.java │ │ │ ├── OrcLazyStructObjectInspector.java │ │ │ ├── OrcLazyTimestamp.java │ │ │ ├── OrcLazyTimestampObjectInspector.java │ │ │ ├── OrcLazyUnion.java │ │ │ └── OrcLazyUnionObjectInspector.java │ │ │ ├── package-info.java │ │ │ └── statistics │ │ │ ├── BooleanColumnStatistics.java │ │ │ ├── ColumnStatistics.java │ │ │ ├── ColumnStatisticsImpl.java │ │ │ ├── DoubleColumnStatistics.java │ │ │ ├── IntegerColumnStatistics.java │ │ │ └── StringColumnStatistics.java │ └── resources │ │ └── protobuf │ │ └── com │ │ └── facebook │ │ └── hive │ │ └── orc │ │ └── orc_proto.proto │ └── test │ ├── java │ └── com │ │ └── facebook │ │ └── hive │ │ └── orc │ │ ├── MemoryManagerWithForce.java │ │ ├── OrcTestUtils.java │ │ ├── TestBitFieldReader.java │ │ ├── TestColumnStatistics.java │ │ ├── TestDynamicArray.java │ │ ├── TestFileDump.java │ │ ├── TestInStream.java │ │ ├── TestInputOutputFormat.java │ │ ├── TestIntDictionaryEncoder.java │ │ ├── TestMapTreeWriter.java │ │ ├── TestMemoryManager.java │ │ ├── TestObjectInspector.java │ │ ├── TestOrcFile.java │ │ ├── TestOrcStruct.java │ │ ├── TestRecordReaderImpl.java │ │ ├── TestRunLengthByteReader.java │ │ ├── TestRunLengthIntegerReader.java │ │ ├── TestSerializationUtils.java │ │ ├── TestStreamName.java │ │ ├── TestStringDictionaryEncoder.java │ │ ├── TestZlib.java │ │ └── WriterImplWithForceFlush.java │ └── resources │ ├── orc-file-dump-dictionary-threshold-unsorted.out │ ├── orc-file-dump-dictionary-threshold-unsorted2.out │ ├── orc-file-dump-dictionary-threshold.out │ ├── orc-file-dump-entropy-threshold.out │ └── orc-file-dump.out └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | *.ipr 3 | *.iws 4 | target/ 5 | /var 6 | /*/var/ 7 | pom.xml.versionsBackup 8 | test-output/ 9 | /atlassian-ide-plugin.xml 10 | .idea 11 | .DS_Store 12 | .classpath 13 | .settings 14 | .project 15 | temp-testng-customsuite.xml 16 | test-output 17 | .externalToolBuilders 18 | *~ 19 | 20 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to DWRF 2 | 3 | We are currently not accepting pull requests. DWRF is a fork of the ORC file format which is part of the Apache Hive project, please submit your contributions there. 4 | 5 | http://hive.apache.org 6 | 7 | https://issues.apache.org/jira/browse/HIVE 8 | 9 | https://cwiki.apache.org/confluence/display/Hive/LanguageManual+ORC 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | - - - 2 | 3 | **_This project is not actively maintained. Proceed at your own risk!_** 4 | 5 | - - - 6 | 7 | hive-dwrf 8 | ========= 9 | 10 | DWRF file format for Hive 11 | 12 | 13 | This is a fork of the ORC file format developed as part of the Apache Hive project https://cwiki.apache.org/confluence/display/Hive/LanguageManual+ORC 14 | 15 | We are not currently accepting pull requests, please submit your changes to the Apache Hive project. 16 | -------------------------------------------------------------------------------- /hive-dwrf-shims/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 17 | 18 | 4.0.0 19 | 20 | 21 | com.facebook.hive 22 | hive-dwrf-root 23 | 0.18.10-SNAPSHOT 24 | 25 | 26 | hive-dwrf-shims 27 | 28 | 29 | ${project.parent.basedir} 30 | 31 | 32 | 33 | 34 | com.facebook.presto.hive 35 | hive-apache 36 | true 37 | 38 | 39 | 40 | com.facebook.presto.hadoop 41 | hadoop-cdh4 42 | true 43 | 44 | 45 | 46 | com.google.guava 47 | guava 48 | 49 | 50 | 51 | commons-logging 52 | commons-logging 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /hive-dwrf-shims/src/main/java/org/apache/hadoop/hive/common/io/RawDatasizeConst.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package org.apache.hadoop.hive.common.io; 22 | 23 | /** 24 | * A class that defines the constant strings used by the raw datasize calculation. 25 | * 26 | * It's shared among RC file format and ORC file format 27 | */ 28 | public final class RawDatasizeConst { 29 | 30 | public final static short NULL_SIZE = 1; 31 | 32 | public final static short BOOLEAN_SIZE = 1; 33 | 34 | public final static short BYTE_SIZE = 1; 35 | 36 | public final static short SHORT_SIZE = 2; 37 | 38 | public final static short INT_SIZE = 4; 39 | 40 | public final static short LONG_SIZE = 8; 41 | 42 | public final static short FLOAT_SIZE = 4; 43 | 44 | public final static short DOUBLE_SIZE = 8; 45 | 46 | /** 47 | * Raw data size is: 48 | * the number of bytes needed to store the milliseconds since the epoch 49 | * (8 since it's a long) 50 | * + 51 | * the number of bytes needed to store the nanos field (4 since it's an int) 52 | */ 53 | public final static short TIMESTAMP_SIZE = 12; 54 | 55 | /** 56 | * UNION raw data size is size of tag (1) + size of value 57 | */ 58 | public final static short UNION_TAG_SIZE = 1; 59 | 60 | /** 61 | * For string type: Raw data size is length - no. of bytes 62 | * 63 | * For binary type: Raw data size is the length of the BytesWritable, i.e. the number of bytes 64 | */ 65 | 66 | /** 67 | * ORC calcuate decimal in HIVE-4120 in such way 68 | * 69 | * @Override 70 | * void write(Object obj) throws IOException { 71 | * super.write(obj); 72 | * if (obj != null) { 73 | * BigDecimal decimal = ((BigDecimalObjectInspector) inspector). 74 | * getPrimitiveJavaObject(obj); 75 | * SerializationUtils.writeBigInteger(valueStream, 76 | * decimal.unscaledValue()); 77 | * scaleStream.write(decimal.scale()); 78 | * indexStatistics.updateDecimal(decimal); 79 | * } 80 | * } 81 | * 82 | * we can simulate it while calculating rawdatasize: 83 | * 4 + ceiling(log2(n)/8.0) bytes 84 | * 1. scale - int - 4 byes 85 | * 2. BigInteger size: ceiling(log2(n)/8.0) bytes 86 | */ 87 | public final static short BIGDECIMAL_SCALE_SIZE = 4; 88 | 89 | } 90 | -------------------------------------------------------------------------------- /hive-dwrf-shims/src/main/java/org/apache/hadoop/hive/ql/io/slice/RuntimeIOException.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package org.apache.hadoop.hive.ql.io.slice; 22 | 23 | import java.io.IOException; 24 | 25 | public class RuntimeIOException 26 | extends RuntimeException 27 | { 28 | public RuntimeIOException(IOException cause) 29 | { 30 | super(cause); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /hive-dwrf-shims/src/main/java/org/apache/hadoop/hive/serde2/objectinspector/FBSettableUnionObjectInspector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hive.serde2.objectinspector; 19 | 20 | /** 21 | * FBSettableUnionObjectInspector. 22 | * 23 | */ 24 | public interface FBSettableUnionObjectInspector extends UnionObjectInspector { 25 | 26 | /** 27 | * Create a union object with tag 0 and object null 28 | */ 29 | public abstract Object create(); 30 | 31 | /** 32 | * Update the union with the tag and the object 33 | */ 34 | public abstract void setField(Object union, byte tag, Object field); 35 | } 36 | -------------------------------------------------------------------------------- /hive-dwrf/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 17 | 18 | 4.0.0 19 | 20 | 21 | com.facebook.hive 22 | hive-dwrf-root 23 | 0.18.10-SNAPSHOT 24 | 25 | 26 | hive-dwrf 27 | 28 | 29 | ${project.parent.basedir} 30 | 31 | 32 | 33 | 34 | com.facebook.hive 35 | hive-dwrf-shims 36 | 37 | 38 | 39 | com.facebook.presto.hadoop 40 | hadoop-cdh4 41 | true 42 | 43 | 44 | 45 | com.facebook.presto.hive 46 | hive-apache 47 | true 48 | 49 | 50 | 51 | com.google.protobuf 52 | protobuf-java 53 | 54 | 55 | 56 | commons-logging 57 | commons-logging 58 | 59 | 60 | 61 | it.unimi.dsi 62 | fastutil 63 | 64 | 65 | 66 | org.iq80.snappy 67 | snappy 68 | 69 | 70 | 71 | com.google.guava 72 | guava 73 | 74 | 75 | 76 | 77 | junit 78 | junit-dep 79 | test 80 | 81 | 82 | 83 | org.mockito 84 | mockito-core 85 | test 86 | 87 | 88 | 89 | org.hamcrest 90 | hamcrest-core 91 | test 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | maven-antrun-plugin 100 | 101 | 102 | generate-sources 103 | generate-sources 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | target/generated-sources/protobuf/gen-java 114 | 115 | 116 | run 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/BitFieldReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.facebook.hive.orc; 21 | 22 | import java.io.EOFException; 23 | import java.io.IOException; 24 | import java.util.List; 25 | 26 | import com.facebook.hive.orc.OrcProto.RowIndexEntry; 27 | 28 | public class BitFieldReader { 29 | private final RunLengthByteReader input; 30 | private int current; 31 | private int bitsLeft; 32 | // The number of consumed bytes at each index stride 33 | private int[] indeces; 34 | 35 | public BitFieldReader(InStream input) throws IOException { 36 | this.input = new RunLengthByteReader(input); 37 | } 38 | 39 | private void readByte() throws IOException { 40 | if (input.hasNext()) { 41 | current = 0xff & input.next(); 42 | bitsLeft = 8; 43 | } else { 44 | throw new EOFException("Read past end of bit field from " + input); 45 | } 46 | } 47 | 48 | public int next() throws IOException { 49 | int result = 0; 50 | 51 | if (bitsLeft == 0) { 52 | readByte(); 53 | } 54 | 55 | bitsLeft--; 56 | result |= (current >>> bitsLeft) & 1; 57 | 58 | return result & 1; 59 | } 60 | 61 | public void seek(int index) throws IOException { 62 | input.seek(index); 63 | int consumed = (int) indeces[index]; 64 | if (consumed > 8) { 65 | throw new IllegalArgumentException("Seek past end of byte at " + 66 | consumed + " in " + input); 67 | } else if (consumed != 0) { 68 | readByte(); 69 | bitsLeft = 8 - consumed; 70 | } else { 71 | bitsLeft = 0; 72 | } 73 | } 74 | 75 | /** 76 | * Read in the number of bytes consumed at each index entry and store it, 77 | * also call loadIndeces on child stream and return the index of the next 78 | * streams indexes. 79 | */ 80 | public int loadIndeces(List rowIndexEntries, int startIndex) { 81 | int updatedStartIndex = input.loadIndeces(rowIndexEntries, startIndex); 82 | 83 | int numIndeces = rowIndexEntries.size(); 84 | indeces = new int[numIndeces + 1]; 85 | int i = 0; 86 | for (RowIndexEntry rowIndexEntry : rowIndexEntries) { 87 | indeces[i] = (int) rowIndexEntry.getPositions(updatedStartIndex); 88 | i++; 89 | } 90 | return updatedStartIndex + 1; 91 | } 92 | 93 | public void skip(long items) throws IOException { 94 | long totalBits = items; 95 | if (bitsLeft >= totalBits) { 96 | bitsLeft -= totalBits; 97 | } else { 98 | totalBits -= bitsLeft; 99 | input.skip(totalBits / 8); 100 | bitsLeft = (int) (8 - (totalBits % 8)); 101 | 102 | // Load the next value only if the stream still has data. If not, 103 | // then mark bitsLeft as zero to force exception when values are 104 | // attempted to be read. 105 | if (input.hasNext()) { 106 | current = input.next(); 107 | } else { 108 | bitsLeft = 0; 109 | } 110 | } 111 | } 112 | 113 | public void close() throws IOException { 114 | input.close(); 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/BitFieldWriter.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.facebook.hive.orc; 21 | 22 | import java.io.IOException; 23 | 24 | class BitFieldWriter { 25 | private RunLengthByteWriter output; 26 | private final int bitSize; 27 | private byte current = 0; 28 | private int bitsLeft = 8; 29 | 30 | BitFieldWriter(PositionedOutputStream output, 31 | int bitSize) throws IOException { 32 | this.output = new RunLengthByteWriter(output); 33 | this.bitSize = bitSize; 34 | } 35 | 36 | private void writeByte() throws IOException { 37 | output.write(current); 38 | current = 0; 39 | bitsLeft = 8; 40 | } 41 | 42 | void flush() throws IOException { 43 | if (bitsLeft != 8) { 44 | writeByte(); 45 | } 46 | output.flush(); 47 | } 48 | 49 | void write(int value) throws IOException { 50 | int bitsToWrite = bitSize; 51 | while (bitsToWrite > bitsLeft) { 52 | // add the bits to the bottom of the current word 53 | current |= value >>> (bitsToWrite - bitsLeft); 54 | // subtract out the bits we just added 55 | bitsToWrite -= bitsLeft; 56 | // zero out the bits above bitsToWrite 57 | value &= (1 << bitsToWrite) - 1; 58 | writeByte(); 59 | } 60 | bitsLeft -= bitsToWrite; 61 | current |= value << bitsLeft; 62 | if (bitsLeft == 0) { 63 | writeByte(); 64 | } 65 | } 66 | 67 | void getPosition(PositionRecorder recorder) throws IOException { 68 | output.getPosition(recorder); 69 | recorder.addPosition(8 - bitsLeft); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/DictionaryEncoder.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc; 22 | 23 | import java.io.IOException; 24 | import java.io.OutputStream; 25 | 26 | /** 27 | * A memory efficient red-black tree that does not allocate any objects per 28 | * an element. This class is abstract and assumes that the child class 29 | * handles the key and comparisons with the key. 30 | */ 31 | abstract class DictionaryEncoder { 32 | protected final boolean sortKeys; 33 | protected final MemoryEstimate memoryEstimate; 34 | 35 | public DictionaryEncoder(boolean sortKeys, MemoryEstimate memoryEstimate) { 36 | this.sortKeys = sortKeys; 37 | this.memoryEstimate = memoryEstimate; 38 | } 39 | 40 | /** 41 | * Compare the value at the given position to the new value. 42 | * @return 0 if the values are the same, -1 if the new value is smaller and 43 | * 1 if the new value is larger. 44 | */ 45 | protected abstract int compareValue(int position); 46 | 47 | /** 48 | * Get the number of elements in the set. 49 | */ 50 | public abstract int size(); 51 | 52 | /** 53 | * Reset the table to empty. 54 | */ 55 | public abstract void clear(); 56 | 57 | /** 58 | * Returns the amount of memory used by entries in the dictionary 59 | */ 60 | public abstract int getUncompressedLength(); 61 | 62 | /** 63 | * The interface for visitors. 64 | */ 65 | public interface Visitor { 66 | /** 67 | * Called once for each node of the tree in sort order. 68 | * @param context the information about each node 69 | * @throws IOException 70 | */ 71 | void visit(VisitorContext context) throws IOException; 72 | } 73 | 74 | /** 75 | * The information about each node. 76 | */ 77 | public interface VisitorContext { 78 | /** 79 | * Get the position where the key was originally added. 80 | * @return the number returned by add. 81 | */ 82 | int getOriginalPosition(); 83 | 84 | /** 85 | * Write the bytes for the string to the given output stream. 86 | * @param out the stream to write to. 87 | * @throws IOException 88 | */ 89 | void writeBytes(OutputStream out) throws IOException; 90 | 91 | /** 92 | * Get the number of bytes 93 | * @return the string's length in bytes 94 | */ 95 | int getLength(); 96 | 97 | T getKey(); 98 | 99 | /** 100 | * Get the number of times this key was added to the dictionary 101 | * @return the number of times this key was added to the dictionary 102 | */ 103 | int getCount(); 104 | 105 | /** 106 | * Get the first index stride when this key was added to the dictionary 107 | * @return the first index stride when this key was added to the dictionary 108 | */ 109 | int getIndexStride(); 110 | } 111 | } 112 | 113 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/DynamicArray.java: -------------------------------------------------------------------------------- 1 | package com.facebook.hive.orc; 2 | 3 | import org.apache.hadoop.hive.ql.io.slice.SizeOf; 4 | import org.apache.hadoop.hive.ql.io.slice.Slice; 5 | import org.apache.hadoop.hive.ql.io.slice.Slices; 6 | 7 | public class DynamicArray { 8 | 9 | protected Slice data; // the real data 10 | protected int length = 0; // max set element index +1 11 | private final MemoryEstimate memoryEstimate; 12 | private final int literalSize; 13 | private final int defaultSize; 14 | 15 | protected DynamicArray(int size, MemoryEstimate memoryEstimate, int literalSize, 16 | int defaultSize) { 17 | if (size <= 0) { 18 | throw new IllegalArgumentException("bad chunksize"); 19 | } 20 | this.memoryEstimate = memoryEstimate; 21 | this.literalSize = literalSize; 22 | this.defaultSize = defaultSize; 23 | setData(Slices.allocate(size)); 24 | } 25 | 26 | protected void setData(Slice newData) { 27 | memoryEstimate.decrementTotalMemory(data == null ? 0 : data.length()); 28 | data = newData; 29 | memoryEstimate.incrementTotalMemory(data.length()); 30 | 31 | } 32 | 33 | /** 34 | * Ensure that the given index is valid. 35 | */ 36 | protected void grow(int index) { 37 | if ((index * literalSize) + (literalSize - 1) >= data.length()) { 38 | int newSize = Math.max((index * literalSize) + defaultSize, 2 * data.length()); 39 | Slice newSlice = Slices.allocate(newSize); 40 | newSlice.setBytes(0, data); 41 | setData(newSlice); 42 | } 43 | } 44 | 45 | /** 46 | * Get the size of the array. 47 | * @return the number of bytes in the array 48 | */ 49 | public int size() { 50 | return length; 51 | } 52 | 53 | /** 54 | * Clear the array to its original pristine state. 55 | */ 56 | public void clear() { 57 | length = 0; 58 | setData(Slices.allocate(defaultSize)); 59 | } 60 | 61 | public long getSizeInBytes() { 62 | return data.length(); 63 | } 64 | 65 | // A cleanup method that should be called before allowing the object to leave scope 66 | public void cleanup() { 67 | memoryEstimate.decrementTotalMemory(data.length()); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/DynamicIntArray.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.facebook.hive.orc; 21 | 22 | import org.apache.hadoop.hive.ql.io.slice.SizeOf; 23 | import org.apache.hadoop.hive.ql.io.slice.Slice; 24 | import org.apache.hadoop.hive.ql.io.slice.Slices; 25 | 26 | /** 27 | * Dynamic int array that uses primitive types and chunks to avoid copying 28 | * large number of integers when it resizes. 29 | * 30 | * The motivation for this class is memory optimization, i.e. space efficient 31 | * storage of potentially huge arrays without good a-priori size guesses. 32 | * 33 | * The API of this class is between a primitive array and a AbstractList. It's 34 | * not a Collection implementation because it handles primitive types, but the 35 | * API could be extended to support iterators and the like. 36 | * 37 | * NOTE: Like standard Collection implementations/arrays, this class is not 38 | * synchronized. 39 | */ 40 | final class DynamicIntArray extends DynamicArray { 41 | static final int DEFAULT_SIZE = SizeOf.SIZE_OF_INT * 8 * 1024; 42 | 43 | public DynamicIntArray(MemoryEstimate memoryEstimate) { 44 | this(DEFAULT_SIZE, memoryEstimate); 45 | } 46 | 47 | public DynamicIntArray(int size, MemoryEstimate memoryEstimate) { 48 | super(size, memoryEstimate, SizeOf.SIZE_OF_INT, DEFAULT_SIZE); 49 | } 50 | 51 | public int get(int index) { 52 | if (index >= length) { 53 | throw new IndexOutOfBoundsException("Index " + index + 54 | " is outside of 0.." + 55 | (length - 1)); 56 | } 57 | 58 | return data.getInt(index * SizeOf.SIZE_OF_INT); 59 | } 60 | 61 | public void set(int index, int value) { 62 | grow(index); 63 | if (index >= length) { 64 | length = index + 1; 65 | } 66 | 67 | data.setInt(index * SizeOf.SIZE_OF_INT, value); 68 | } 69 | 70 | public void increment(int index, int value) { 71 | grow(index); 72 | if (index >= length) { 73 | length = index + 1; 74 | } 75 | data.setInt(index * SizeOf.SIZE_OF_INT, data.getInt(index * SizeOf.SIZE_OF_INT) + value); 76 | } 77 | 78 | public void add(int value) { 79 | grow(length); 80 | data.setInt(length * SizeOf.SIZE_OF_INT, value); 81 | length += 1; 82 | } 83 | 84 | @Override 85 | public String toString() { 86 | int i; 87 | StringBuilder sb = new StringBuilder(length * 4); 88 | 89 | sb.append('{'); 90 | int l = length - 1; 91 | for (i=0; i= length) { 53 | throw new IndexOutOfBoundsException("Index " + index + 54 | " is outside of 0.." + 55 | (length - 1)); 56 | } 57 | 58 | return data.getLong(index * SizeOf.SIZE_OF_LONG); 59 | } 60 | 61 | public void set(int index, long value) { 62 | grow(index); 63 | if (index >= length) { 64 | length = index + 1; 65 | } 66 | 67 | data.setLong(index * SizeOf.SIZE_OF_LONG, value); 68 | } 69 | 70 | public void increment(int index, long value) { 71 | grow(index); 72 | if (index >= length) { 73 | length = index + 1; 74 | } 75 | 76 | data.setLong(index * SizeOf.SIZE_OF_LONG, data.getLong(index * SizeOf.SIZE_OF_LONG) + value); 77 | } 78 | 79 | public void add(long value) { 80 | grow(length); 81 | data.setLong(length * SizeOf.SIZE_OF_LONG, value); 82 | length += 1; 83 | } 84 | 85 | @Override 86 | public String toString() { 87 | int i; 88 | StringBuilder sb = new StringBuilder(length * 4); 89 | 90 | sb.append('{'); 91 | int l = length - 1; 92 | for (i=0; i { 29 | private final int column; 30 | private final OrcProto.Stream.Kind kind; 31 | 32 | public static enum Area { 33 | DATA, DICTIONARY, INDEX 34 | } 35 | 36 | public StreamName(int column, OrcProto.Stream.Kind kind) { 37 | this.column = column; 38 | this.kind = kind; 39 | } 40 | 41 | @Override 42 | public boolean equals(Object obj) { 43 | if (obj != null && obj instanceof StreamName) { 44 | StreamName other = (StreamName) obj; 45 | return other.column == column && other.kind == kind; 46 | } else { 47 | return false; 48 | } 49 | } 50 | 51 | @Override 52 | public int compareTo(StreamName streamName) { 53 | if (streamName == null) { 54 | return -1; 55 | } 56 | Area area = getArea(kind); 57 | Area otherArea = streamName.getArea(streamName.kind); 58 | if (area != otherArea) { 59 | return -area.compareTo(otherArea); 60 | } 61 | if (column != streamName.column) { 62 | return column < streamName.column ? -1 : 1; 63 | } 64 | return compareKinds(kind, streamName.kind); 65 | } 66 | 67 | // LENGTH is greater than DATA at the moment, but when we read the data we always read length 68 | // first (because you have to know how much data to read). Since this is an enum, we're kind of 69 | // stuck with it, this is just a hack to work around that. 70 | private int compareKinds(Kind kind1, Kind kind2) { 71 | if (kind1 == Kind.LENGTH && kind2 == Kind.DATA) { 72 | return -1; 73 | } 74 | 75 | if (kind1 == Kind.DATA && kind2 == Kind.LENGTH) { 76 | return 1; 77 | } 78 | 79 | return kind1.compareTo(kind2); 80 | } 81 | 82 | public int getColumn() { 83 | return column; 84 | } 85 | 86 | public OrcProto.Stream.Kind getKind() { 87 | return kind; 88 | } 89 | 90 | public Area getArea() { 91 | return getArea(kind); 92 | } 93 | 94 | public static Area getArea(OrcProto.Stream.Kind kind) { 95 | switch (kind) { 96 | case ROW_INDEX: 97 | case DICTIONARY_COUNT: 98 | return Area.INDEX; 99 | case DICTIONARY_DATA: 100 | return Area.DICTIONARY; 101 | default: 102 | return Area.DATA; 103 | } 104 | } 105 | 106 | @Override 107 | public String toString() { 108 | return "Stream for column " + column + " kind " + kind; 109 | } 110 | 111 | @Override 112 | public int hashCode() { 113 | return column * 101 + kind.getNumber(); 114 | } 115 | } 116 | 117 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/StripeInformation.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.facebook.hive.orc; 21 | 22 | /** 23 | * Information about the stripes in an ORC file that is provided by the Reader. 24 | */ 25 | public interface StripeInformation { 26 | /** 27 | * Get the byte offset of the start of the stripe. 28 | * @return the bytes from the start of the file 29 | */ 30 | long getOffset(); 31 | 32 | /** 33 | * Get the length of the stripe's indexes. 34 | * @return the number of bytes in the index 35 | */ 36 | long getIndexLength(); 37 | 38 | /** 39 | * Get the length of the stripe's data. 40 | * @return the number of bytes in the stripe 41 | */ 42 | long getDataLength(); 43 | 44 | /** 45 | * Get the length of the stripe's tail section, which contains its index. 46 | * @return the number of bytes in the tail 47 | */ 48 | long getFooterLength(); 49 | 50 | /** 51 | * Get the number of rows in the stripe. 52 | * @return a count of the number of rows 53 | */ 54 | long getNumberOfRows(); 55 | 56 | /** 57 | * Get the raw size of the data in the stripe. 58 | * @return the number of bytes of raw data 59 | */ 60 | long getRawDataSize(); 61 | } 62 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/StripeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.facebook.hive.orc; 21 | 22 | import java.io.IOException; 23 | import java.util.ArrayList; 24 | import java.util.List; 25 | 26 | import org.apache.hadoop.fs.FSDataInputStream; 27 | import org.apache.hadoop.fs.FileSystem; 28 | import org.apache.hadoop.fs.Path; 29 | import com.facebook.hive.orc.OrcFile.KeyWrapper; 30 | import com.facebook.hive.orc.OrcFile.ValueWrapper; 31 | 32 | public class StripeReader { 33 | private final FSDataInputStream file; 34 | private final List stripes = 35 | new ArrayList(); 36 | private StripeInformation currentStripe; 37 | private byte[] currentData; 38 | private int stripesRead = 0; 39 | 40 | StripeReader(Iterable stripes, 41 | FileSystem fileSystem, 42 | Path path, 43 | long offset, long length 44 | ) throws IOException { 45 | this.file = fileSystem.open(path); 46 | for(StripeInformation stripe: stripes) { 47 | long stripeStart = stripe.getOffset(); 48 | if (stripeStart >= offset && stripeStart < offset + length) { 49 | this.stripes.add(stripe); 50 | } 51 | } 52 | } 53 | 54 | private void readStripe() throws IOException { 55 | currentStripe = stripes.get(stripesRead); 56 | currentData = new byte[(int) (currentStripe.getIndexLength() + currentStripe.getDataLength() + 57 | currentStripe.getFooterLength())]; 58 | file.seek(currentStripe.getOffset()); 59 | file.readFully(currentData, 0, currentData.length); 60 | } 61 | 62 | public boolean hasNext() throws IOException { 63 | return stripesRead < stripes.size(); 64 | } 65 | 66 | public boolean nextStripe(KeyWrapper keyWrapper, ValueWrapper valueWrapper) throws IOException { 67 | if (hasNext()) { 68 | readStripe(); 69 | keyWrapper.key = currentStripe; 70 | valueWrapper.value = currentData; 71 | stripesRead += 1; 72 | } 73 | return hasNext(); 74 | } 75 | 76 | public void close() throws IOException { 77 | file.close(); 78 | } 79 | 80 | public long getPosition() throws IOException { 81 | return file.getPos(); 82 | } 83 | 84 | /** 85 | * Return the fraction of stripes that have been read from the selected. 86 | * section of the file 87 | * @return fraction between 0.0 and 1.0 of stripes consumed 88 | */ 89 | public float getProgress() { 90 | return ((float) stripesRead) / stripes.size(); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/Writer.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc; 22 | 23 | import java.io.IOException; 24 | import java.nio.ByteBuffer; 25 | 26 | /** 27 | * The interface for writing ORC files. 28 | */ 29 | public interface Writer { 30 | /** 31 | * Add arbitrary meta-data to the ORC file. This may be called at any point 32 | * until the Writer is closed. If the same key is passed a second time, the 33 | * second value will replace the first. 34 | * @param key a key to label the data with. 35 | * @param value the contents of the metadata. 36 | */ 37 | void addUserMetadata(String key, ByteBuffer value); 38 | 39 | /** 40 | * Add a row to the ORC file. 41 | * @param row the row to add 42 | * @throws IOException 43 | */ 44 | void addRow(Object row) throws IOException; 45 | 46 | /** 47 | * Get the raw data size of the last row added 48 | */ 49 | long getRowRawDataSize(); 50 | 51 | /** 52 | * Flush all of the buffers and close the file. No methods on this writer 53 | * should be called afterwards. 54 | * @throws IOException 55 | */ 56 | void close() throws IOException; 57 | } 58 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/compression/CompressionCodec.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.facebook.hive.orc.compression; 21 | 22 | import java.io.IOException; 23 | import java.nio.ByteBuffer; 24 | 25 | import org.apache.hadoop.conf.Configuration; 26 | 27 | public interface CompressionCodec { 28 | /** 29 | * Compress the in buffer to the out buffer. 30 | * @param in the bytes to compress 31 | * @param out the uncompressed bytes 32 | * @param overflow put any additional bytes here 33 | * @return true if the output is smaller than input 34 | * @throws IOException 35 | */ 36 | boolean compress(ByteBuffer in, ByteBuffer out, ByteBuffer overflow 37 | ) throws IOException; 38 | 39 | /** 40 | * Decompress the in buffer to the out buffer. 41 | * @param in the bytes to decompress 42 | * @param out the decompressed bytes 43 | * @throws IOException 44 | */ 45 | void decompress(ByteBuffer in, ByteBuffer out) throws IOException; 46 | 47 | void reloadConfigurations(Configuration conf); 48 | } 49 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/compression/CompressionKind.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.compression; 22 | 23 | /** 24 | * An enumeration that lists the generic compression algorithms that 25 | * can be applied to ORC files. 26 | */ 27 | public enum CompressionKind { 28 | NONE, ZLIB, SNAPPY, LZO 29 | } 30 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/compression/SnappyCodec.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.compression; 22 | 23 | import java.io.IOException; 24 | import java.nio.ByteBuffer; 25 | 26 | import com.facebook.hive.orc.compression.CompressionCodec; 27 | import org.apache.hadoop.conf.Configuration; 28 | import org.iq80.snappy.Snappy; 29 | 30 | public class SnappyCodec implements CompressionCodec { 31 | 32 | @Override 33 | public void reloadConfigurations(Configuration conf) { 34 | } 35 | 36 | @Override 37 | public boolean compress(ByteBuffer in, ByteBuffer out, 38 | ByteBuffer overflow) throws IOException { 39 | int inBytes = in.remaining(); 40 | // I should work on a patch for Snappy to support an overflow buffer 41 | // to prevent the extra buffer copy. 42 | byte[] compressed = new byte[Snappy.maxCompressedLength(inBytes)]; 43 | int outBytes = 44 | Snappy.compress(in.array(), in.arrayOffset() + in.position(), inBytes, 45 | compressed, 0); 46 | if (outBytes < inBytes) { 47 | int remaining = out.remaining(); 48 | if (remaining >= outBytes) { 49 | System.arraycopy(compressed, 0, out.array(), out.arrayOffset() + 50 | out.position(), outBytes); 51 | out.position(out.position() + outBytes); 52 | } else { 53 | System.arraycopy(compressed, 0, out.array(), out.arrayOffset() + 54 | out.position(), remaining); 55 | out.position(out.limit()); 56 | System.arraycopy(compressed, remaining, overflow.array(), 57 | overflow.arrayOffset(), outBytes - remaining); 58 | overflow.position(outBytes - remaining); 59 | } 60 | return true; 61 | } else { 62 | return false; 63 | } 64 | } 65 | 66 | @Override 67 | public void decompress(ByteBuffer in, ByteBuffer out) throws IOException { 68 | int inOffset = in.position(); 69 | int uncompressLen = 70 | Snappy.uncompress(in.array(), in.arrayOffset() + inOffset, 71 | in.limit() - inOffset, out.array(), out.arrayOffset() + out.position()); 72 | out.position(uncompressLen + out.position()); 73 | out.flip(); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/compression/ZlibCodec.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.facebook.hive.orc.compression; 21 | 22 | import java.io.IOException; 23 | import java.nio.ByteBuffer; 24 | import java.util.zip.DataFormatException; 25 | import java.util.zip.Deflater; 26 | import java.util.zip.Inflater; 27 | 28 | import com.facebook.hive.orc.OrcConf; 29 | import com.facebook.hive.orc.compression.CompressionCodec; 30 | import org.apache.hadoop.conf.Configuration; 31 | 32 | public class ZlibCodec implements CompressionCodec { 33 | 34 | private int compressionLevel; 35 | 36 | public ZlibCodec() { 37 | compressionLevel = Deflater.DEFAULT_COMPRESSION; 38 | } 39 | 40 | public ZlibCodec(Configuration conf) { 41 | if (conf == null) { 42 | compressionLevel = Deflater.DEFAULT_COMPRESSION; 43 | } else { 44 | compressionLevel = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ZLIB_COMPRESSION_LEVEL); 45 | } 46 | } 47 | 48 | public void reloadConfigurations(Configuration conf) { 49 | compressionLevel = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ZLIB_COMPRESSION_LEVEL); 50 | } 51 | 52 | @Override 53 | public boolean compress(ByteBuffer in, ByteBuffer out, 54 | ByteBuffer overflow) throws IOException { 55 | Deflater deflater = new Deflater(compressionLevel, true); 56 | int length = in.remaining(); 57 | deflater.setInput(in.array(), in.arrayOffset() + in.position(), length); 58 | deflater.finish(); 59 | int outSize = 0; 60 | int offset = out.arrayOffset() + out.position(); 61 | while (!deflater.finished() && (length > outSize)) { 62 | int size = deflater.deflate(out.array(), offset, out.remaining()); 63 | out.position(size + out.position()); 64 | outSize += size; 65 | offset += size; 66 | // if we run out of space in the out buffer, use the overflow 67 | if (out.remaining() == 0) { 68 | if (overflow == null) { 69 | deflater.end(); 70 | return false; 71 | } 72 | out = overflow; 73 | offset = out.arrayOffset() + out.position(); 74 | } 75 | } 76 | deflater.end(); 77 | return length > outSize; 78 | } 79 | 80 | @Override 81 | public void decompress(ByteBuffer in, ByteBuffer out) throws IOException { 82 | Inflater inflater = new Inflater(true); 83 | inflater.setInput(in.array(), in.arrayOffset() + in.position(), 84 | in.remaining()); 85 | while (!(inflater.finished() || inflater.needsDictionary() || 86 | inflater.needsInput())) { 87 | try { 88 | int count = inflater.inflate(out.array(), 89 | out.arrayOffset() + out.position(), 90 | out.remaining()); 91 | out.position(count + out.position()); 92 | } catch (DataFormatException dfe) { 93 | throw new IOException("Bad compression data", dfe); 94 | } 95 | } 96 | out.flip(); 97 | inflater.end(); 98 | in.position(in.limit()); 99 | } 100 | 101 | } 102 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyBinaryTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.EOFException; 24 | import java.io.IOException; 25 | import java.util.List; 26 | import java.util.Map; 27 | 28 | import org.apache.hadoop.io.BytesWritable; 29 | 30 | import com.facebook.hive.orc.InStream; 31 | import com.facebook.hive.orc.OrcProto; 32 | import com.facebook.hive.orc.RunLengthIntegerReader; 33 | import com.facebook.hive.orc.StreamName; 34 | import com.facebook.hive.orc.WriterImpl; 35 | import com.facebook.hive.orc.OrcProto.RowIndex; 36 | import com.facebook.hive.orc.OrcProto.RowIndexEntry; 37 | 38 | public class LazyBinaryTreeReader extends LazyTreeReader { 39 | 40 | private InStream stream; 41 | private RunLengthIntegerReader lengths; 42 | 43 | public LazyBinaryTreeReader(int columnId, long rowIndexStride) { 44 | super(columnId, rowIndexStride); 45 | } 46 | 47 | @Override 48 | public void startStripe(Map streams, List encodings, 49 | RowIndex[] indexes, long rowBaseInStripe) throws IOException { 50 | super.startStripe(streams, encodings, indexes, rowBaseInStripe); 51 | StreamName name = new StreamName(columnId, 52 | OrcProto.Stream.Kind.DATA); 53 | stream = streams.get(name); 54 | lengths = new RunLengthIntegerReader(streams.get(new 55 | StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), 56 | false, WriterImpl.INT_BYTE_SIZE); 57 | if (indexes[columnId] != null) { 58 | loadIndeces(indexes[columnId].getEntryList(), 0); 59 | } 60 | } 61 | 62 | @Override 63 | public void seek(int index) throws IOException { 64 | stream.seek(index); 65 | lengths.seek(index); 66 | } 67 | 68 | @Override 69 | public int loadIndeces(List rowIndexEntries, int startIndex) { 70 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex); 71 | updatedStartIndex = stream.loadIndeces(rowIndexEntries, updatedStartIndex); 72 | return lengths.loadIndeces(rowIndexEntries, updatedStartIndex); 73 | } 74 | 75 | @Override 76 | public Object next(Object previous) throws IOException { 77 | BytesWritable result = null; 78 | if (valuePresent) { 79 | if (previous == null) { 80 | result = new BytesWritable(); 81 | } else { 82 | result = (BytesWritable) previous; 83 | } 84 | int len = (int) lengths.next(); 85 | result.setSize(len); 86 | int offset = 0; 87 | while (len > 0) { 88 | int written = stream.read(result.getBytes(), offset, len); 89 | if (written < 0) { 90 | throw new EOFException("Can't finish byte read from " + stream); 91 | } 92 | len -= written; 93 | offset += written; 94 | } 95 | } 96 | return result; 97 | } 98 | 99 | @Override 100 | public void skipRows(long numNonNullValues) throws IOException { 101 | long lengthToSkip = 0; 102 | for(int i=0; i < numNonNullValues; ++i) { 103 | lengthToSkip += lengths.next(); 104 | } 105 | stream.skip(lengthToSkip); 106 | } 107 | 108 | @Override 109 | public void close() throws IOException { 110 | super.close(); 111 | if (stream != null) { 112 | stream.close(); 113 | } 114 | if (lengths != null) { 115 | lengths.close(); 116 | } 117 | } 118 | 119 | } 120 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyBooleanTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | import org.apache.hadoop.io.BooleanWritable; 28 | 29 | import com.facebook.hive.orc.BitFieldReader; 30 | import com.facebook.hive.orc.InStream; 31 | import com.facebook.hive.orc.OrcProto; 32 | import com.facebook.hive.orc.StreamName; 33 | import com.facebook.hive.orc.OrcProto.RowIndex; 34 | import com.facebook.hive.orc.OrcProto.RowIndexEntry; 35 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException; 36 | 37 | public class LazyBooleanTreeReader extends LazyTreeReader { 38 | 39 | private BitFieldReader reader = null; 40 | private boolean latestRead = true; //< Latest value from stream. 41 | 42 | public LazyBooleanTreeReader(int columnId, long rowIndexStride) { 43 | super(columnId, rowIndexStride); 44 | } 45 | 46 | @Override 47 | public void startStripe(Map streams, List encodings, 48 | RowIndex[] indexes, long rowBaseInStripe) throws IOException { 49 | super.startStripe(streams, encodings, indexes, rowBaseInStripe); 50 | reader = new BitFieldReader(streams.get(new StreamName(columnId, 51 | OrcProto.Stream.Kind.DATA))); 52 | if (indexes[columnId] != null) { 53 | loadIndeces(indexes[columnId].getEntryList(), 0); 54 | } 55 | } 56 | 57 | @Override 58 | public void seek(int index) throws IOException { 59 | reader.seek(index); 60 | } 61 | 62 | @Override 63 | public int loadIndeces(List rowIndexEntries, int startIndex) { 64 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex); 65 | return reader.loadIndeces(rowIndexEntries, updatedStartIndex); 66 | } 67 | 68 | @Override 69 | public void skipRows(long numNonNullValues) throws IOException { 70 | reader.skip(numNonNullValues); 71 | } 72 | 73 | boolean readBoolean() throws IOException { 74 | latestRead = (reader.next() == 1); 75 | return latestRead; 76 | } 77 | 78 | 79 | BooleanWritable createWritable(Object previous, boolean v) throws IOException { 80 | BooleanWritable result = null; 81 | if (previous == null) { 82 | result = new BooleanWritable(); 83 | } else { 84 | result = (BooleanWritable) previous; 85 | } 86 | result.set(v); 87 | return result; 88 | } 89 | 90 | @Override 91 | public Object createWritableFromLatest(Object previous) throws IOException { 92 | return createWritable(previous, latestRead); 93 | } 94 | 95 | @Override 96 | public boolean nextBoolean(boolean readStream) throws IOException { 97 | if (!readStream) { 98 | return latestRead; 99 | } 100 | if (!valuePresent) { 101 | throw new ValueNotPresentException("Cannot materialize boolean."); 102 | } 103 | return readBoolean(); 104 | 105 | } 106 | 107 | @Override 108 | public Object next(Object previous) throws IOException { 109 | BooleanWritable result = null; 110 | if (valuePresent) { 111 | result = createWritable(previous, readBoolean()); 112 | } 113 | return result; 114 | } 115 | 116 | @Override 117 | public void close() throws IOException { 118 | super.close(); 119 | if (reader != null) { 120 | reader.close(); 121 | } 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyByteTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | import org.apache.hadoop.hive.serde2.io.ByteWritable; 28 | 29 | import com.facebook.hive.orc.InStream; 30 | import com.facebook.hive.orc.OrcProto; 31 | import com.facebook.hive.orc.RunLengthByteReader; 32 | import com.facebook.hive.orc.StreamName; 33 | import com.facebook.hive.orc.OrcProto.RowIndex; 34 | import com.facebook.hive.orc.OrcProto.RowIndexEntry; 35 | 36 | public class LazyByteTreeReader extends LazyTreeReader { 37 | 38 | private RunLengthByteReader reader = null; 39 | 40 | public LazyByteTreeReader(int columnId, long rowIndexStride) { 41 | super(columnId, rowIndexStride); 42 | } 43 | 44 | @Override 45 | public void startStripe(Map streams, List encodings, 46 | RowIndex[] indexes, long rowBaseInStripe) throws IOException { 47 | super.startStripe(streams, encodings, indexes, rowBaseInStripe); 48 | reader = new RunLengthByteReader(streams.get(new StreamName(columnId, 49 | OrcProto.Stream.Kind.DATA))); 50 | if (indexes[columnId] != null) { 51 | loadIndeces(indexes[columnId].getEntryList(), 0); 52 | } 53 | } 54 | 55 | @Override 56 | public void seek(int index) throws IOException { 57 | reader.seek(index); 58 | } 59 | 60 | @Override 61 | public int loadIndeces(List rowIndexEntries, int startIndex) { 62 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex); 63 | return reader.loadIndeces(rowIndexEntries, updatedStartIndex); 64 | } 65 | 66 | @Override 67 | public Object next(Object previous) throws IOException { 68 | ByteWritable result = null; 69 | if (valuePresent) { 70 | if (previous == null) { 71 | result = new ByteWritable(); 72 | } else { 73 | result = (ByteWritable) previous; 74 | } 75 | result.set(reader.next()); 76 | } 77 | return result; 78 | } 79 | 80 | @Override 81 | public void skipRows(long numNonNullValues) throws IOException { 82 | reader.skip(numNonNullValues); 83 | } 84 | 85 | @Override 86 | public void close() throws IOException { 87 | super.close(); 88 | if (reader != null) { 89 | reader.close(); 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyFloatTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | import org.apache.hadoop.io.FloatWritable; 28 | 29 | import com.facebook.hive.orc.InStream; 30 | import com.facebook.hive.orc.OrcProto; 31 | import com.facebook.hive.orc.SerializationUtils; 32 | import com.facebook.hive.orc.StreamName; 33 | import com.facebook.hive.orc.OrcProto.RowIndex; 34 | import com.facebook.hive.orc.OrcProto.RowIndexEntry; 35 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException; 36 | 37 | public class LazyFloatTreeReader extends LazyTreeReader { 38 | 39 | private InStream stream; 40 | private float latestRead = 0; //< Last float that was read from stream. 41 | 42 | public LazyFloatTreeReader(int columnId, long rowIndexStride) { 43 | super(columnId, rowIndexStride); 44 | } 45 | 46 | @Override 47 | public void startStripe(Map streams, List encodings, 48 | RowIndex[] indexes, long rowBaseInStripe) throws IOException { 49 | super.startStripe(streams, encodings, indexes, rowBaseInStripe); 50 | StreamName name = new StreamName(columnId, 51 | OrcProto.Stream.Kind.DATA); 52 | stream = streams.get(name); 53 | if (indexes[columnId] != null) { 54 | loadIndeces(indexes[columnId].getEntryList(), 0); 55 | } 56 | } 57 | 58 | @Override 59 | public void seek(int index) throws IOException { 60 | stream.seek(index); 61 | } 62 | 63 | @Override 64 | public int loadIndeces(List rowIndexEntries, int startIndex) { 65 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex); 66 | return stream.loadIndeces(rowIndexEntries, updatedStartIndex); 67 | } 68 | 69 | /** 70 | * Read a float value from the stream. 71 | */ 72 | private float readFloat() throws IOException { 73 | latestRead = SerializationUtils.readFloat(stream); 74 | return latestRead; 75 | } 76 | 77 | 78 | FloatWritable createWritable(Object previous, float value) throws IOException { 79 | FloatWritable result = null; 80 | if (previous == null) { 81 | result = new FloatWritable(); 82 | } else { 83 | result = (FloatWritable) previous; 84 | } 85 | result.set(value); 86 | return result; 87 | } 88 | 89 | @Override 90 | public Object createWritableFromLatest(Object previous) throws IOException { 91 | return createWritable(previous, latestRead); 92 | } 93 | 94 | /** 95 | * Give the next float as a primitive. 96 | */ 97 | @Override 98 | public float nextFloat(boolean readStream) throws IOException, ValueNotPresentException { 99 | if (!readStream) { 100 | return latestRead; 101 | } 102 | if (!valuePresent) { 103 | throw new ValueNotPresentException("Cannot materialize float.."); 104 | } 105 | return readFloat(); 106 | } 107 | 108 | @Override 109 | public Object next(Object previous) throws IOException { 110 | FloatWritable result = null; 111 | if (valuePresent) { 112 | result = createWritable(previous, readFloat()); 113 | } 114 | return result; 115 | } 116 | 117 | @Override 118 | public void skipRows(long numNonNullValues) throws IOException { 119 | for(int i=0; i < numNonNullValues; ++i) { 120 | SerializationUtils.readFloat(stream); 121 | } 122 | } 123 | 124 | @Override 125 | public void close() throws IOException { 126 | super.close(); 127 | if (stream != null) { 128 | stream.close(); 129 | } 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyIntDictionaryTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | 25 | import org.apache.hadoop.io.IntWritable; 26 | 27 | import com.facebook.hive.orc.WriterImpl; 28 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException; 29 | 30 | class LazyIntDictionaryTreeReader extends LazyNumericDictionaryTreeReader { 31 | LazyIntDictionaryTreeReader (int columnId, long rowIndexStride) { 32 | super(columnId, rowIndexStride); 33 | } 34 | 35 | @Override 36 | protected int getNumBytes() { 37 | return WriterImpl.INT_BYTE_SIZE; 38 | } 39 | 40 | private int latestValue; //< Latest key that was read from reader. 41 | 42 | /** 43 | * Read an int value from the stream. 44 | */ 45 | private int readInt() throws IOException { 46 | return latestValue = (int) readPrimitive(); 47 | } 48 | 49 | private int latestValue() { 50 | return latestValue; 51 | } 52 | 53 | IntWritable createWritable(Object previous, int v) throws IOException { 54 | IntWritable result = null; 55 | if (previous == null) { 56 | result = new IntWritable(); 57 | } else { 58 | result = (IntWritable) previous; 59 | } 60 | result.set(v); 61 | return result; 62 | } 63 | 64 | @Override 65 | public Object createWritableFromLatest(Object previous) throws IOException { 66 | return createWritable(previous, latestValue()); 67 | } 68 | 69 | /** 70 | * Give the next int as a primitive 71 | */ 72 | @Override 73 | public int nextInt(boolean readStream) throws IOException { 74 | if (!readStream) { 75 | return latestValue(); 76 | } 77 | if (!valuePresent) { 78 | throw new ValueNotPresentException("Cannot materialize int."); 79 | } 80 | return readInt(); 81 | } 82 | 83 | 84 | @Override 85 | public Object next(Object previous) throws IOException { 86 | IntWritable result = null; 87 | if (valuePresent) { 88 | result = createWritable(previous, readInt()); 89 | } 90 | return result; 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyIntDirectTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | 25 | import org.apache.hadoop.io.IntWritable; 26 | 27 | import com.facebook.hive.orc.SerializationUtils; 28 | import com.facebook.hive.orc.WriterImpl; 29 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException; 30 | 31 | 32 | class LazyIntDirectTreeReader extends LazyNumericDirectTreeReader { 33 | LazyIntDirectTreeReader(int columnId, long rowIndexStride) { 34 | super(columnId, rowIndexStride); 35 | } 36 | 37 | private int latestRead; //< Latest integer read from the stream. 38 | 39 | /** 40 | * Read an int value from the stream. 41 | */ 42 | private int readInt() throws IOException { 43 | latestRead = (int)SerializationUtils.readIntegerType(input, WriterImpl.INT_BYTE_SIZE, 44 | true, input.useVInts()); 45 | return latestRead; 46 | } 47 | 48 | 49 | IntWritable createWritable(Object previous, int v) throws IOException { 50 | IntWritable result = null; 51 | if (previous == null) { 52 | result = new IntWritable(); 53 | } else { 54 | result = (IntWritable) previous; 55 | } 56 | result.set(v); 57 | return result; 58 | } 59 | 60 | @Override 61 | public Object createWritableFromLatest(Object previous) throws IOException { 62 | return createWritable(previous, latestRead); 63 | } 64 | 65 | /** 66 | * Give the next int as a primitive 67 | */ 68 | @Override 69 | public int nextInt(boolean readStream) throws IOException { 70 | if (!readStream) 71 | return latestRead; 72 | if (!valuePresent) 73 | throw new ValueNotPresentException("Cannot materialize int."); 74 | return readInt(); 75 | } 76 | 77 | 78 | @Override 79 | public Object next(Object previous) throws IOException { 80 | IntWritable result = null; 81 | if (valuePresent) { 82 | result = createWritable(previous, readInt()); 83 | } 84 | return result; 85 | } 86 | 87 | @Override 88 | public void skipRows(long numNonNullValues) throws IOException { 89 | for (int i = 0; i < numNonNullValues; i++) { 90 | SerializationUtils.readIntegerType(input, WriterImpl.INT_BYTE_SIZE, 91 | true, input.useVInts()); 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyListTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | import java.util.ArrayList; 25 | import java.util.List; 26 | import java.util.Map; 27 | 28 | import com.facebook.hive.orc.InStream; 29 | import com.facebook.hive.orc.OrcProto; 30 | import com.facebook.hive.orc.RunLengthIntegerReader; 31 | import com.facebook.hive.orc.StreamName; 32 | import com.facebook.hive.orc.WriterImpl; 33 | import com.facebook.hive.orc.OrcProto.RowIndex; 34 | import com.facebook.hive.orc.OrcProto.RowIndexEntry; 35 | 36 | public class LazyListTreeReader extends LazyTreeReader { 37 | private final LazyTreeReader elementReader; 38 | private RunLengthIntegerReader lengths; 39 | 40 | public LazyListTreeReader(int columnId, long rowIndexStride, LazyTreeReader elementReader) { 41 | super(columnId, rowIndexStride); 42 | this.elementReader = elementReader; 43 | } 44 | 45 | @Override 46 | public Object next(Object previous) throws IOException { 47 | List result = null; 48 | if (valuePresent) { 49 | final int length = nextLength(); 50 | final List prevResult = previous == null ? 51 | null : (ArrayList) previous; 52 | final int prevLength = prevResult == null ? -1 : prevResult.size(); 53 | 54 | // read the new elements into the array 55 | result = new ArrayList(length); 56 | for(int i=0; i < length; i++) { 57 | result.add(elementReader.getInComplexType(i < prevLength ? 58 | prevResult.get(i) : null, previousRow)); 59 | } 60 | } 61 | return result; 62 | } 63 | 64 | @Override 65 | protected void seek(int rowIndexEntry, boolean backwards) throws IOException { 66 | super.seek(rowIndexEntry, backwards); 67 | elementReader.seek(rowIndexEntry, backwards); 68 | } 69 | 70 | @Override 71 | public void startStripe(Map streams, 72 | List encodings, 73 | RowIndex[] indexes, 74 | long rowBaseInStripe 75 | ) throws IOException { 76 | super.startStripe(streams, encodings, indexes, rowBaseInStripe); 77 | elementReader.startStripe(streams, encodings, indexes, rowBaseInStripe); 78 | lengths = new RunLengthIntegerReader(streams.get(new StreamName(columnId, 79 | OrcProto.Stream.Kind.LENGTH)), false, WriterImpl.INT_BYTE_SIZE); 80 | if (indexes[columnId] != null) { 81 | loadIndeces(indexes[columnId].getEntryList(), 0); 82 | } 83 | } 84 | 85 | @Override 86 | public void seek(int index) throws IOException { 87 | lengths.seek(index); 88 | } 89 | 90 | @Override 91 | public int loadIndeces(List rowIndexEntries, int startIndex) { 92 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex); 93 | return lengths.loadIndeces(rowIndexEntries, updatedStartIndex); 94 | } 95 | 96 | public int nextLength() throws IOException { 97 | return (int) lengths.next(); 98 | } 99 | 100 | @Override 101 | public void skipRows(long numNonNullValues) throws IOException { 102 | long childSkip = 0; 103 | for(long i=0; i < numNonNullValues; ++i) { 104 | childSkip += lengths.next(); 105 | } 106 | elementReader.skipRowsInComplexType(childSkip); 107 | } 108 | 109 | @Override 110 | public void close() throws IOException { 111 | super.close(); 112 | if (elementReader != null) { 113 | elementReader.close(); 114 | } 115 | if (lengths != null) { 116 | lengths.close(); 117 | } 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyLongDictionaryTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | 25 | import org.apache.hadoop.io.LongWritable; 26 | 27 | import com.facebook.hive.orc.WriterImpl; 28 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException; 29 | 30 | class LazyLongDictionaryTreeReader extends LazyNumericDictionaryTreeReader { 31 | 32 | private long latestValue = 0; //< Latest index read from reader 33 | 34 | LazyLongDictionaryTreeReader (int columnId, long rowIndexStride) { 35 | super(columnId, rowIndexStride); 36 | } 37 | 38 | @Override 39 | protected int getNumBytes() { 40 | return WriterImpl.LONG_BYTE_SIZE; 41 | } 42 | 43 | private long readLong() throws IOException { 44 | return latestValue = readPrimitive(); 45 | } 46 | 47 | private long latestValue() { 48 | return latestValue; 49 | } 50 | 51 | private LongWritable createWritable(Object previous, long v) throws IOException { 52 | LongWritable result = null; 53 | if (previous == null) { 54 | result = new LongWritable(); 55 | } else { 56 | result = (LongWritable) previous; 57 | } 58 | result.set(v); 59 | return result; 60 | } 61 | 62 | @Override 63 | public Object createWritableFromLatest(Object previous) throws IOException { 64 | return createWritable(previous, latestValue()); 65 | } 66 | 67 | @Override 68 | public long nextLong(boolean readStream) throws IOException { 69 | if (!readStream) { 70 | return latestValue(); 71 | } 72 | if (!valuePresent) { 73 | throw new ValueNotPresentException("Cannot materialize long."); 74 | } 75 | return readLong(); 76 | } 77 | 78 | @Override 79 | public Object next(Object previous) throws IOException { 80 | LongWritable result = null; 81 | if (valuePresent) { 82 | result = createWritable(previous, readLong()); 83 | } 84 | return result; 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyLongDirectTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | 25 | import org.apache.hadoop.io.LongWritable; 26 | 27 | import com.facebook.hive.orc.SerializationUtils; 28 | import com.facebook.hive.orc.WriterImpl; 29 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException; 30 | 31 | 32 | class LazyLongDirectTreeReader extends LazyNumericDirectTreeReader { 33 | private long latestRead = 0; 34 | 35 | LazyLongDirectTreeReader(int columnId, long rowIndexStride) { 36 | super(columnId, rowIndexStride); 37 | } 38 | 39 | private long readLong() throws IOException { 40 | latestRead = (long)SerializationUtils.readIntegerType(input, WriterImpl.LONG_BYTE_SIZE, 41 | true, input.useVInts()); 42 | return latestRead; 43 | } 44 | 45 | private LongWritable createWritable(Object previous, long v) throws IOException { 46 | LongWritable result = null; 47 | if (previous == null) { 48 | result = new LongWritable(); 49 | } else { 50 | result = (LongWritable) previous; 51 | } 52 | result.set(v); 53 | return result; 54 | } 55 | 56 | @Override 57 | public Object createWritableFromLatest(Object previous) throws IOException { 58 | return createWritable(previous, latestRead); 59 | } 60 | 61 | @Override 62 | public long nextLong(boolean readStream) throws IOException { 63 | if (!readStream) 64 | return latestRead; 65 | if (!valuePresent) 66 | throw new ValueNotPresentException("Cannot materialize long."); 67 | return readLong(); 68 | } 69 | 70 | @Override 71 | public Object next(Object previous) throws IOException { 72 | LongWritable result = null; 73 | if (valuePresent) { 74 | result = createWritable(previous, readLong()); 75 | } 76 | return result; 77 | } 78 | 79 | @Override 80 | public void skipRows(long numNonNullValues) throws IOException { 81 | for (int i = 0; i < numNonNullValues; i++) { 82 | SerializationUtils.readIntegerType(input, WriterImpl.LONG_BYTE_SIZE, 83 | true, input.useVInts()); 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyLongTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | import com.facebook.hive.orc.InStream; 28 | import com.facebook.hive.orc.OrcProto; 29 | import com.facebook.hive.orc.StreamName; 30 | import com.facebook.hive.orc.OrcProto.RowIndex; 31 | 32 | public class LazyLongTreeReader extends LazyIntTreeReader { 33 | 34 | public LazyLongTreeReader(int columnId, long rowIndexStride) { 35 | super(columnId, rowIndexStride); 36 | } 37 | 38 | @Override 39 | public void startStripe(Map streams, List encodings, 40 | RowIndex[] indexes, long rowBaseInStripe) throws IOException { 41 | switch (encodings.get(columnId).getKind()) { 42 | case DICTIONARY: 43 | reader = new LazyLongDictionaryTreeReader(columnId, rowIndexStride); 44 | break; 45 | case DIRECT: 46 | reader = new LazyLongDirectTreeReader(columnId, rowIndexStride); 47 | break; 48 | default: 49 | throw new IllegalArgumentException("Unsupported encoding " + 50 | encodings.get(columnId).getKind()); 51 | } 52 | reader.startStripe(streams, encodings, indexes, rowBaseInStripe); 53 | if (indexes[columnId] != null) { 54 | loadIndeces(indexes[columnId].getEntryList(), 0); 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyNumericDirectTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | import com.facebook.hive.orc.InStream; 28 | import com.facebook.hive.orc.OrcProto; 29 | import com.facebook.hive.orc.StreamName; 30 | import com.facebook.hive.orc.OrcProto.RowIndex; 31 | import com.facebook.hive.orc.OrcProto.RowIndexEntry; 32 | 33 | abstract class LazyNumericDirectTreeReader extends LazyTreeReader { 34 | protected InStream input; 35 | 36 | LazyNumericDirectTreeReader(int columnId, long rowIndexStride) { 37 | super(columnId, rowIndexStride); 38 | } 39 | 40 | @Override 41 | public void startStripe(Map streams, List encodings, 42 | RowIndex[] indexes, long rowBaseInStripe) throws IOException { 43 | super.startStripe(streams, encodings, indexes, rowBaseInStripe); 44 | StreamName name = new StreamName(columnId, 45 | OrcProto.Stream.Kind.DATA); 46 | input = streams.get(name); 47 | if (indexes[columnId] != null) { 48 | loadIndeces(indexes[columnId].getEntryList(), 0); 49 | } 50 | } 51 | 52 | @Override 53 | public void seek(int index) throws IOException { 54 | input.seek(index); 55 | } 56 | 57 | @Override 58 | public int loadIndeces(List rowIndexEntries, int startIndex) { 59 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex); 60 | return input.loadIndeces(rowIndexEntries, updatedStartIndex); 61 | } 62 | 63 | @Override 64 | public void close() throws IOException { 65 | super.close(); 66 | if (input != null) { 67 | input.close(); 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyShortDictionaryTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | 25 | import org.apache.hadoop.hive.serde2.io.ShortWritable; 26 | 27 | import com.facebook.hive.orc.WriterImpl; 28 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException; 29 | 30 | class LazyShortDictionaryTreeReader extends LazyNumericDictionaryTreeReader { 31 | LazyShortDictionaryTreeReader (int columnId, long rowIndexStride) { 32 | super(columnId, rowIndexStride); 33 | } 34 | 35 | @Override 36 | protected int getNumBytes() { 37 | return WriterImpl.SHORT_BYTE_SIZE; 38 | } 39 | 40 | private short latestValue; //< Latest key that was read from reader. 41 | 42 | /** 43 | * Read an short value from the stream. 44 | */ 45 | private short readShort() throws IOException { 46 | return latestValue = (short) readPrimitive(); 47 | } 48 | 49 | private short latestValue() { 50 | return latestValue; 51 | } 52 | 53 | ShortWritable createWritable(Object previous, short v) throws IOException { 54 | ShortWritable result = null; 55 | if (previous == null) { 56 | result = new ShortWritable(); 57 | } else { 58 | result = (ShortWritable) previous; 59 | } 60 | result.set(v); 61 | return result; 62 | } 63 | 64 | @Override 65 | public Object createWritableFromLatest(Object previous) throws IOException { 66 | return createWritable(previous, latestValue()); 67 | } 68 | 69 | /** 70 | * Give the next short as a primitive 71 | */ 72 | @Override 73 | public short nextShort(boolean readStream) throws IOException { 74 | if (!readStream) { 75 | return latestValue(); 76 | } 77 | if (!valuePresent) { 78 | throw new ValueNotPresentException("Cannot materialize short."); 79 | } 80 | return readShort(); 81 | } 82 | 83 | @Override 84 | public Object next(Object previous) throws IOException { 85 | ShortWritable result = null; 86 | if (valuePresent) { 87 | result = createWritable(previous, readShort()); 88 | } 89 | return result; 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyShortDirectTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | 25 | import org.apache.hadoop.hive.serde2.io.ShortWritable; 26 | 27 | import com.facebook.hive.orc.SerializationUtils; 28 | import com.facebook.hive.orc.WriterImpl; 29 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException; 30 | 31 | class LazyShortDirectTreeReader extends LazyNumericDirectTreeReader { 32 | LazyShortDirectTreeReader(int columnId, long rowIndexStride) { 33 | super(columnId, rowIndexStride); 34 | } 35 | 36 | private short latestRead = 0; //< Latest integer read from the stream. 37 | 38 | /** 39 | * Read a short value from the stream. 40 | */ 41 | private short readShort() throws IOException { 42 | latestRead = 43 | (short)SerializationUtils.readIntegerType(input, WriterImpl.SHORT_BYTE_SIZE, 44 | true, input.useVInts()); 45 | return latestRead; 46 | } 47 | 48 | 49 | ShortWritable createWritable(Object previous, short v) throws IOException { 50 | ShortWritable result = null; 51 | if (previous == null) { 52 | result = new ShortWritable(); 53 | } else { 54 | result = (ShortWritable) previous; 55 | } 56 | result.set(v); 57 | return result; 58 | } 59 | 60 | @Override 61 | public Object createWritableFromLatest(Object previous) throws IOException { 62 | return createWritable(previous, latestRead); 63 | } 64 | 65 | /** 66 | * Give the next short as a primitive 67 | */ 68 | @Override 69 | public short nextShort(boolean readStream) throws IOException { 70 | if (!readStream) 71 | return latestRead; 72 | if (!valuePresent) 73 | throw new ValueNotPresentException("Cannot materialize short."); 74 | return readShort(); 75 | } 76 | 77 | @Override 78 | public Object next(Object previous) throws IOException { 79 | ShortWritable result = null; 80 | if (valuePresent) { 81 | result = createWritable(previous, readShort()); 82 | } 83 | return result; 84 | } 85 | 86 | @Override 87 | public void skipRows(long numNonNullValues) throws IOException { 88 | for (int i = 0; i < numNonNullValues; i++) { 89 | SerializationUtils.readIntegerType(input, WriterImpl.SHORT_BYTE_SIZE, 90 | true, input.useVInts()); 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyShortTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | import com.facebook.hive.orc.InStream; 28 | import com.facebook.hive.orc.OrcProto; 29 | import com.facebook.hive.orc.StreamName; 30 | import com.facebook.hive.orc.OrcProto.RowIndex; 31 | 32 | public class LazyShortTreeReader extends LazyIntTreeReader { 33 | 34 | public LazyShortTreeReader(int columnId, long rowIndexStride) { 35 | super(columnId, rowIndexStride); 36 | } 37 | 38 | @Override 39 | public void startStripe(Map streams, List encodings, 40 | RowIndex[] indexes, long rowBaseInStripe) throws IOException { 41 | switch (encodings.get(columnId).getKind()) { 42 | case DICTIONARY: 43 | reader = new LazyShortDictionaryTreeReader(columnId, rowIndexStride); 44 | break; 45 | case DIRECT: 46 | reader = new LazyShortDirectTreeReader(columnId, rowIndexStride); 47 | break; 48 | default: 49 | throw new IllegalArgumentException("Unsupported encoding " + 50 | encodings.get(columnId).getKind()); 51 | } 52 | reader.startStripe(streams, encodings, indexes, rowBaseInStripe); 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyStringDirectTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.EOFException; 24 | import java.io.IOException; 25 | import java.util.List; 26 | import java.util.Map; 27 | 28 | import org.apache.hadoop.io.Text; 29 | 30 | import com.facebook.hive.orc.InStream; 31 | import com.facebook.hive.orc.OrcProto; 32 | import com.facebook.hive.orc.RunLengthIntegerReader; 33 | import com.facebook.hive.orc.StreamName; 34 | import com.facebook.hive.orc.WriterImpl; 35 | import com.facebook.hive.orc.OrcProto.RowIndex; 36 | import com.facebook.hive.orc.OrcProto.RowIndexEntry; 37 | 38 | class LazyStringDirectTreeReader extends LazyTreeReader { 39 | private InStream stream; 40 | private RunLengthIntegerReader lengths; 41 | 42 | LazyStringDirectTreeReader(int columnId, long rowIndexStride) { 43 | super(columnId, rowIndexStride); 44 | } 45 | 46 | @Override 47 | public void startStripe(Map streams, List encodings, 48 | RowIndex[] indexes, long rowBaseInStripe) throws IOException { 49 | super.startStripe(streams, encodings, indexes, rowBaseInStripe); 50 | StreamName name = new StreamName(columnId, 51 | OrcProto.Stream.Kind.DATA); 52 | stream = streams.get(name); 53 | lengths = new RunLengthIntegerReader(streams.get(new 54 | StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), 55 | false, WriterImpl.INT_BYTE_SIZE); 56 | if (indexes[columnId] != null) { 57 | loadIndeces(indexes[columnId].getEntryList(), 0); 58 | } 59 | } 60 | 61 | @Override 62 | public void seek(int index) throws IOException { 63 | stream.seek(index); 64 | lengths.seek(index); 65 | } 66 | 67 | @Override 68 | public int loadIndeces(List rowIndexEntries, int startIndex) { 69 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex); 70 | updatedStartIndex = stream.loadIndeces(rowIndexEntries, updatedStartIndex); 71 | return lengths.loadIndeces(rowIndexEntries, updatedStartIndex); 72 | } 73 | 74 | @Override 75 | public Object next(Object previous) throws IOException { 76 | Text result = null; 77 | if (valuePresent) { 78 | if (previous == null) { 79 | result = new Text(); 80 | } else { 81 | result = (Text) previous; 82 | } 83 | int len = (int) lengths.next(); 84 | int offset = 0; 85 | byte[] bytes = new byte[len]; 86 | while (len > 0) { 87 | int written = stream.read(bytes, offset, len); 88 | if (written < 0) { 89 | throw new EOFException("Can't finish byte read from " + stream); 90 | } 91 | len -= written; 92 | offset += written; 93 | } 94 | result.set(bytes); 95 | } 96 | return result; 97 | } 98 | 99 | @Override 100 | public void skipRows(long numNonNullValues) throws IOException { 101 | long lengthToSkip = 0; 102 | for(int i=0; i < numNonNullValues; ++i) { 103 | lengthToSkip += lengths.next(); 104 | } 105 | stream.skip(lengthToSkip); 106 | } 107 | 108 | @Override 109 | public void close() throws IOException { 110 | super.close(); 111 | if (stream != null) { 112 | stream.close(); 113 | } 114 | if (lengths != null) { 115 | lengths.close(); 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyStringTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | import com.facebook.hive.orc.InStream; 28 | import com.facebook.hive.orc.OrcProto; 29 | import com.facebook.hive.orc.StreamName; 30 | import com.facebook.hive.orc.OrcProto.RowIndex; 31 | import com.facebook.hive.orc.OrcProto.RowIndexEntry; 32 | 33 | public class LazyStringTreeReader extends LazyTreeReader { 34 | 35 | private LazyTreeReader reader; 36 | 37 | public LazyStringTreeReader(int columnId, long rowIndexStride) { 38 | super(columnId, rowIndexStride); 39 | } 40 | 41 | @Override 42 | public void startStripe(Map streams, List encodings, 43 | RowIndex[] indexes, long rowBaseInStripe) throws IOException { 44 | // For each stripe, checks the encoding and initializes the appropriate reader 45 | switch (encodings.get(columnId).getKind()) { 46 | case DIRECT: 47 | reader = new LazyStringDirectTreeReader(columnId, rowIndexStride); 48 | break; 49 | case DICTIONARY: 50 | reader = new LazyStringDictionaryTreeReader(columnId, rowIndexStride); 51 | break; 52 | default: 53 | throw new IllegalArgumentException("Unsupported encoding " + 54 | encodings.get(columnId).getKind()); 55 | } 56 | 57 | reader.startStripe(streams, encodings, indexes, rowBaseInStripe); 58 | } 59 | 60 | @Override 61 | public void seek(int index) throws IOException { 62 | reader.seek(index); 63 | } 64 | 65 | @Override 66 | public int loadIndeces(List rowIndexEntries, int startIndex) { 67 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex); 68 | return reader.loadIndeces(rowIndexEntries, updatedStartIndex); 69 | } 70 | 71 | @Override 72 | protected void seek(int rowIndexEntry, boolean backwards) throws IOException { 73 | reader.seek(rowIndexEntry, backwards); 74 | } 75 | 76 | @Override 77 | public Object next(Object previous) throws IOException { 78 | return reader.next(previous); 79 | } 80 | 81 | @Override 82 | public void skipRowsInComplexType(long numRows) throws IOException { 83 | reader.skipRowsInComplexType(numRows); 84 | } 85 | 86 | @Override 87 | public void skipRows(long numNonNullValues) throws IOException { 88 | reader.skipRows(numNonNullValues); 89 | } 90 | 91 | @Override 92 | public boolean nextIsNull(long currentRow) throws IOException { 93 | return reader.nextIsNull(currentRow); 94 | } 95 | 96 | @Override 97 | public boolean nextIsNullInComplexType() throws IOException { 98 | return reader.nextIsNullInComplexType(); 99 | } 100 | 101 | @Override 102 | public Object getInComplexType(Object previous, long row) throws IOException { 103 | return reader.getInComplexType(previous, row); 104 | } 105 | 106 | @Override 107 | public Object get(long currentRow, Object previous) throws IOException { 108 | return reader.get(currentRow, previous); 109 | } 110 | 111 | @Override 112 | public void close() throws IOException { 113 | super.close(); 114 | if (reader != null) { 115 | reader.close(); 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyStructTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | import com.facebook.hive.orc.InStream; 28 | import com.facebook.hive.orc.OrcProto; 29 | import com.facebook.hive.orc.OrcStruct; 30 | import com.facebook.hive.orc.StreamName; 31 | import com.facebook.hive.orc.OrcProto.RowIndex; 32 | 33 | public class LazyStructTreeReader extends LazyTreeReader { 34 | 35 | private final LazyTreeReader[] fields; 36 | private final List fieldNames; 37 | 38 | public LazyStructTreeReader(int columnId, long rowIndexStride, LazyTreeReader[] fields, 39 | List fieldNames) throws IOException { 40 | super(columnId, rowIndexStride); 41 | this.fields = fields; 42 | this.fieldNames = fieldNames; 43 | } 44 | 45 | @Override 46 | public void skipRows(long numNonNullValues) throws IOException { 47 | for(LazyTreeReader field: fields) { 48 | if (field != null) { 49 | field.skipRowsInComplexType(numNonNullValues); 50 | } 51 | } 52 | } 53 | 54 | @Override 55 | public Object next(Object previous) throws IOException { 56 | OrcStruct result = null; 57 | if (valuePresent) { 58 | if (previous == null) { 59 | result = new OrcStruct(fieldNames); 60 | } else { 61 | result = (OrcStruct) previous; 62 | 63 | // If the input format was initialized with a file with a 64 | // different number of fields, the number of fields needs to 65 | // be updated to the correct number 66 | result.setFieldNames(fieldNames); 67 | } 68 | for(int i=0; i < fields.length; ++i) { 69 | if (fields[i] != null) { 70 | result.setFieldValue(i, 71 | fields[i].getInComplexType(result.getFieldValue(i), previousRow)); 72 | } 73 | } 74 | } 75 | return result; 76 | } 77 | 78 | @Override 79 | public void startStripe(Map streams, 80 | List encodings, 81 | RowIndex[] indexes, 82 | long rowBaseInStripe 83 | ) throws IOException { 84 | super.startStripe(streams, encodings, indexes, rowBaseInStripe); 85 | for (int i = 0; i < fields.length; i++) { 86 | if (fields[i] != null) { 87 | fields[i].startStripe(streams, encodings, indexes, rowBaseInStripe); 88 | } 89 | } 90 | if (indexes[columnId] != null) { 91 | loadIndeces(indexes[columnId].getEntryList(), 0); 92 | } 93 | } 94 | 95 | @Override 96 | protected void seek(int rowIndexEntry, boolean backwards) throws IOException { 97 | super.seek(rowIndexEntry, backwards); 98 | for (LazyTreeReader field : fields) { 99 | if (field != null) { 100 | field.seek(rowIndexEntry, backwards); 101 | } 102 | } 103 | } 104 | 105 | @Override 106 | protected void seek(int index) throws IOException { 107 | // Most tree readers have streams besides the present stream, e.g. the data for a simple type 108 | // or the length of a complex type. The only data structs contain besides whether or not 109 | // they're null is the fields themselves, each of which has its own tree reader, so nothing 110 | // to do here. 111 | } 112 | 113 | @Override 114 | public void close() throws IOException { 115 | super.close(); 116 | for (LazyTreeReader field : fields) { 117 | if (field != null) { 118 | field.close(); 119 | } 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyUnionTreeReader.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | import com.facebook.hive.orc.InStream; 28 | import com.facebook.hive.orc.OrcProto; 29 | import com.facebook.hive.orc.OrcUnion; 30 | import com.facebook.hive.orc.RunLengthByteReader; 31 | import com.facebook.hive.orc.StreamName; 32 | import com.facebook.hive.orc.OrcProto.RowIndex; 33 | import com.facebook.hive.orc.OrcProto.RowIndexEntry; 34 | 35 | public class LazyUnionTreeReader extends LazyTreeReader { 36 | 37 | private final LazyTreeReader[] fields; 38 | private RunLengthByteReader tags; 39 | 40 | public LazyUnionTreeReader(int columnId, long rowIndexStride, LazyTreeReader[] fields) { 41 | super(columnId, rowIndexStride); 42 | this.fields = fields; 43 | } 44 | 45 | @Override 46 | public Object next(Object previous) throws IOException { 47 | OrcUnion result = null; 48 | if (valuePresent) { 49 | if (previous == null) { 50 | result = new OrcUnion(); 51 | } else { 52 | result = (OrcUnion) previous; 53 | } 54 | byte tag = nextTag(); 55 | Object previousVal = result.getObject(); 56 | result.set(tag, fields[tag].getInComplexType(tag == result.getTag() ? 57 | previousVal : null, previousRow)); 58 | } 59 | return result; 60 | } 61 | 62 | @Override 63 | protected void seek(int rowIndexEntry, boolean backwards) throws IOException { 64 | super.seek(rowIndexEntry, backwards); 65 | for (LazyTreeReader field : fields) { 66 | field.seek(rowIndexEntry, backwards); 67 | } 68 | } 69 | 70 | @Override 71 | public void startStripe(Map streams, List encodings, 72 | RowIndex[] indexes, long rowBaseInStripe) throws IOException { 73 | super.startStripe(streams, encodings, indexes, rowBaseInStripe); 74 | for (int i = 0; i < fields.length; i++) { 75 | fields[i].startStripe(streams, encodings, indexes, rowBaseInStripe); 76 | } 77 | tags = new RunLengthByteReader(streams.get(new StreamName(columnId, 78 | OrcProto.Stream.Kind.DATA))); 79 | if (indexes[columnId] != null) { 80 | loadIndeces(indexes[columnId].getEntryList(), 0); 81 | } 82 | } 83 | 84 | @Override 85 | public void seek(int index) throws IOException { 86 | tags.seek(index); 87 | } 88 | 89 | @Override 90 | public int loadIndeces(List rowIndexEntries, int startIndex) { 91 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex); 92 | return tags.loadIndeces(rowIndexEntries, updatedStartIndex); 93 | } 94 | 95 | public byte nextTag() throws IOException { 96 | return (byte) tags.next(); 97 | } 98 | 99 | @Override 100 | public void skipRows(long numNonNullValues) throws IOException { 101 | long[] counts = new long[fields.length]; 102 | for(int i=0; i < numNonNullValues; ++i) { 103 | counts[tags.next()] += 1; 104 | } 105 | for(int i=0; i < counts.length; ++i) { 106 | fields[i].skipRowsInComplexType(counts[i]); 107 | } 108 | } 109 | 110 | @Override 111 | public void close() throws IOException { 112 | super.close(); 113 | for (LazyTreeReader field : fields) { 114 | field.close(); 115 | } 116 | if (tags != null) { 117 | tags.close(); 118 | } 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyBinary.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.io.BytesWritable; 24 | 25 | public class OrcLazyBinary extends OrcLazyObject { 26 | 27 | public OrcLazyBinary(LazyBinaryTreeReader treeReader) { 28 | super(treeReader); 29 | } 30 | 31 | public OrcLazyBinary(OrcLazyBinary copy) { 32 | super(copy); 33 | if (copy.previous != null) { 34 | BytesWritable copyPrevious = (BytesWritable) copy.previous; 35 | byte[] bytes = new byte[copyPrevious.getLength()]; 36 | System.arraycopy(copyPrevious.getBytes(), 0, bytes, 0, copyPrevious.getLength()); 37 | previous = new BytesWritable(bytes); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyBinaryObjectInspector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.hive.serde2.lazy.LazyUtils; 24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; 25 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; 26 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; 27 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; 28 | import org.apache.hadoop.io.BytesWritable; 29 | 30 | public class OrcLazyBinaryObjectInspector extends OrcLazyPrimitiveObjectInspector implements 31 | BinaryObjectInspector { 32 | 33 | protected OrcLazyBinaryObjectInspector() { 34 | super(PrimitiveObjectInspectorUtils.binaryTypeEntry); 35 | } 36 | 37 | @Override 38 | public byte[] getPrimitiveJavaObject(Object o) { 39 | BytesWritable writable = getPrimitiveWritableObject(o); 40 | return writable == null ? null : LazyUtils.createByteArray(writable); 41 | } 42 | 43 | @Override 44 | public Object copyObject(Object o) { 45 | return o == null ? null : new OrcLazyBinary((OrcLazyBinary) o); 46 | } 47 | 48 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive 49 | public PrimitiveTypeInfo getTypeInfo() { 50 | return (PrimitiveTypeInfo) TypeInfoFactory.binaryTypeInfo; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyBoolean.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.io.BooleanWritable; 24 | 25 | public class OrcLazyBoolean extends OrcLazyObject { 26 | public OrcLazyBoolean(LazyBooleanTreeReader treeReader) { 27 | super(treeReader); 28 | } 29 | 30 | public OrcLazyBoolean(OrcLazyBoolean copy) { 31 | super(copy); 32 | if (copy.previous != null) { 33 | previous = new BooleanWritable(((BooleanWritable)copy.previous).get()); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyBooleanObjectInspector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; 24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; 25 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; 26 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; 27 | import org.apache.hadoop.io.BooleanWritable; 28 | 29 | public class OrcLazyBooleanObjectInspector extends OrcLazyPrimitiveObjectInspector implements 30 | BooleanObjectInspector { 31 | 32 | protected OrcLazyBooleanObjectInspector() { 33 | super(PrimitiveObjectInspectorUtils.booleanTypeEntry); 34 | } 35 | 36 | @Override 37 | public boolean get(Object o) { 38 | return ((BooleanWritable)getPrimitiveWritableObject(o)).get(); 39 | } 40 | 41 | @Override 42 | public Object copyObject(Object o) { 43 | return o == null ? null : new OrcLazyBoolean((OrcLazyBoolean) o); 44 | } 45 | 46 | @Override 47 | public Object getPrimitiveJavaObject(Object o) { 48 | BooleanWritable writable = getPrimitiveWritableObject(o); 49 | return writable == null ? null : Boolean.valueOf(writable.get()); 50 | } 51 | 52 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive 53 | public PrimitiveTypeInfo getTypeInfo() { 54 | return (PrimitiveTypeInfo) TypeInfoFactory.booleanTypeInfo; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyByte.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.hive.serde2.io.ByteWritable; 24 | 25 | public class OrcLazyByte extends OrcLazyObject { 26 | public OrcLazyByte(LazyByteTreeReader treeReader) { 27 | super(treeReader); 28 | } 29 | 30 | public OrcLazyByte(OrcLazyByte copy) { 31 | super(copy); 32 | if (copy.previous != null) { 33 | previous = new ByteWritable(((ByteWritable)copy.previous).get()); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyByteObjectInspector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.hive.serde2.io.ByteWritable; 24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; 25 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; 26 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; 27 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; 28 | 29 | public class OrcLazyByteObjectInspector extends OrcLazyPrimitiveObjectInspector implements 30 | ByteObjectInspector { 31 | 32 | protected OrcLazyByteObjectInspector() { 33 | super(PrimitiveObjectInspectorUtils.byteTypeEntry); 34 | } 35 | 36 | @Override 37 | public byte get(Object o) { 38 | return ((ByteWritable)getPrimitiveWritableObject(o)).get(); 39 | } 40 | 41 | @Override 42 | public Object copyObject(Object o) { 43 | return o == null ? null : new OrcLazyByte((OrcLazyByte) o); 44 | } 45 | 46 | @Override 47 | public Object getPrimitiveJavaObject(Object o) { 48 | ByteWritable writable = getPrimitiveWritableObject(o); 49 | return writable == null ? null : Byte.valueOf(writable.get()); 50 | } 51 | 52 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive 53 | public PrimitiveTypeInfo getTypeInfo() { 54 | return (PrimitiveTypeInfo) TypeInfoFactory.byteTypeInfo; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyDouble.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.hive.serde2.io.DoubleWritable; 24 | 25 | public class OrcLazyDouble extends OrcLazyObject { 26 | 27 | public OrcLazyDouble(LazyDoubleTreeReader treeReader) { 28 | super(treeReader); 29 | } 30 | 31 | public OrcLazyDouble(OrcLazyDouble copy) { 32 | super(copy); 33 | if (copy.previous != null) { 34 | previous = new DoubleWritable(((DoubleWritable)copy.previous).get()); 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyDoubleObjectInspector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.hive.serde2.io.DoubleWritable; 24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; 25 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; 26 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; 27 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; 28 | 29 | public class OrcLazyDoubleObjectInspector extends OrcLazyPrimitiveObjectInspector 30 | implements DoubleObjectInspector { 31 | 32 | protected OrcLazyDoubleObjectInspector() { 33 | super(PrimitiveObjectInspectorUtils.doubleTypeEntry); 34 | } 35 | 36 | public double get(Object o) { 37 | return ((DoubleWritable)getPrimitiveWritableObject(o)).get(); 38 | } 39 | 40 | @Override 41 | public Object copyObject(Object o) { 42 | return o == null ? null : new OrcLazyDouble((OrcLazyDouble) o); 43 | } 44 | 45 | @Override 46 | public Object getPrimitiveJavaObject(Object o) { 47 | DoubleWritable writable = getPrimitiveWritableObject(o); 48 | return writable == null ? null : Double.valueOf(writable.get()); 49 | } 50 | 51 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive 52 | public PrimitiveTypeInfo getTypeInfo() { 53 | return (PrimitiveTypeInfo) TypeInfoFactory.doubleTypeInfo; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyFloat.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.io.FloatWritable; 24 | 25 | public class OrcLazyFloat extends OrcLazyObject { 26 | 27 | public OrcLazyFloat(LazyFloatTreeReader treeReader) { 28 | super(treeReader); 29 | } 30 | 31 | public OrcLazyFloat(OrcLazyFloat copy) { 32 | super(copy); 33 | if (copy.previous != null) { 34 | previous = new FloatWritable(((FloatWritable)copy.previous).get()); 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyFloatObjectInspector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; 24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; 25 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; 26 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; 27 | import org.apache.hadoop.io.FloatWritable; 28 | 29 | public class OrcLazyFloatObjectInspector extends OrcLazyPrimitiveObjectInspector implements 30 | FloatObjectInspector { 31 | 32 | OrcLazyFloatObjectInspector() { 33 | super(PrimitiveObjectInspectorUtils.floatTypeEntry); 34 | } 35 | 36 | public float get(Object o) { 37 | return ((FloatWritable)getPrimitiveWritableObject(o)).get(); 38 | } 39 | 40 | @Override 41 | public Object copyObject(Object o) { 42 | return o == null ? null : new OrcLazyFloat((OrcLazyFloat) o); 43 | } 44 | 45 | @Override 46 | public Object getPrimitiveJavaObject(Object o) { 47 | FloatWritable writable = getPrimitiveWritableObject(o); 48 | return writable == null ? null : Float.valueOf(writable.get()); 49 | } 50 | 51 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive 52 | public PrimitiveTypeInfo getTypeInfo() { 53 | return (PrimitiveTypeInfo) TypeInfoFactory.floatTypeInfo; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyInt.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.io.IntWritable; 24 | 25 | public class OrcLazyInt extends OrcLazyObject { 26 | 27 | public OrcLazyInt(LazyIntTreeReader treeReader) { 28 | super(treeReader); 29 | } 30 | 31 | public OrcLazyInt(OrcLazyInt copy) { 32 | super(copy); 33 | if (copy.previous != null) { 34 | previous = new IntWritable(((IntWritable)copy.previous).get()); 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyIntObjectInspector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; 24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; 25 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; 26 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; 27 | import org.apache.hadoop.io.IntWritable; 28 | 29 | public class OrcLazyIntObjectInspector extends OrcLazyPrimitiveObjectInspector implements 30 | IntObjectInspector { 31 | 32 | protected OrcLazyIntObjectInspector() { 33 | super(PrimitiveObjectInspectorUtils.intTypeEntry); 34 | } 35 | 36 | @Override 37 | public int get(Object o) { 38 | return ((IntWritable)getPrimitiveWritableObject(o)).get(); 39 | } 40 | 41 | @Override 42 | public Object copyObject(Object o) { 43 | return o == null ? null : new OrcLazyInt((OrcLazyInt) o); 44 | } 45 | 46 | @Override 47 | public Object getPrimitiveJavaObject(Object o) { 48 | IntWritable writable = (IntWritable) getPrimitiveWritableObject(o); 49 | return writable == null ? null : Integer.valueOf(writable.get()); 50 | } 51 | 52 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive 53 | public PrimitiveTypeInfo getTypeInfo() { 54 | return (PrimitiveTypeInfo) TypeInfoFactory.intTypeInfo; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyList.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | public class OrcLazyList extends OrcLazyObject { 24 | 25 | public OrcLazyList(LazyListTreeReader treeReader) { 26 | super(treeReader); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyListObjectInspector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | import java.util.List; 25 | 26 | import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; 27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 28 | import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; 29 | 30 | import com.facebook.hive.orc.OrcProto; 31 | 32 | public class OrcLazyListObjectInspector implements ListObjectInspector { 33 | 34 | private final ObjectInspector child; 35 | 36 | public OrcLazyListObjectInspector(ListTypeInfo info) { 37 | child = OrcLazyObjectInspectorUtils.createWritableObjectInspector(info.getListElementTypeInfo()); 38 | } 39 | 40 | public OrcLazyListObjectInspector(int columnId, List types) { 41 | OrcProto.Type type = types.get(columnId); 42 | child = OrcLazyObjectInspectorUtils.createWritableObjectInspector(type.getSubtypes(0), types); 43 | } 44 | 45 | @Override 46 | public List getList(Object data) { 47 | if (data == null) { 48 | return null; 49 | } 50 | 51 | try { 52 | return (List) ((OrcLazyList) data).materialize(); 53 | } catch (IOException e) { 54 | throw new RuntimeException(e); 55 | } 56 | } 57 | 58 | @Override 59 | public Object getListElement(Object data, int index) { 60 | List list = getList(data); 61 | 62 | if (list == null || index < 0 || index >= list.size()) { 63 | return null; 64 | } 65 | 66 | return list.get(index); 67 | } 68 | 69 | @Override 70 | public ObjectInspector getListElementObjectInspector() { 71 | return child; 72 | } 73 | 74 | @Override 75 | public int getListLength(Object data) { 76 | if (data == null) { 77 | return -1; 78 | } 79 | 80 | return getList(data).size(); 81 | } 82 | 83 | @Override 84 | public Category getCategory() { 85 | return Category.LIST; 86 | } 87 | 88 | @Override 89 | public String getTypeName() { 90 | return "array<" + child.getTypeName() + ">"; 91 | } 92 | 93 | @Override 94 | public boolean equals(Object o) { 95 | if (o == null || o.getClass() != getClass()) { 96 | return false; 97 | } else if (o == this) { 98 | return true; 99 | } else { 100 | ObjectInspector other = ((OrcLazyListObjectInspector) o).child; 101 | return other.equals(child); 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyLong.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.io.LongWritable; 24 | 25 | public class OrcLazyLong extends OrcLazyObject { 26 | 27 | public OrcLazyLong(LazyLongTreeReader treeReader) { 28 | super(treeReader); 29 | } 30 | 31 | public OrcLazyLong(OrcLazyLong copy) { 32 | super(copy); 33 | if (copy.previous != null) { 34 | previous = new LongWritable(((LongWritable)copy.previous).get()); 35 | } 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyLongObjectInspector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; 24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; 25 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; 26 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; 27 | import org.apache.hadoop.io.LongWritable; 28 | 29 | public class OrcLazyLongObjectInspector extends OrcLazyPrimitiveObjectInspector 30 | implements LongObjectInspector { 31 | 32 | protected OrcLazyLongObjectInspector() { 33 | super(PrimitiveObjectInspectorUtils.longTypeEntry); 34 | } 35 | 36 | @Override 37 | public Object copyObject(Object o) { 38 | return o == null ? null : new OrcLazyLong((OrcLazyLong) o); 39 | } 40 | 41 | @Override 42 | public Object getPrimitiveJavaObject(Object o) { 43 | LongWritable writable = (LongWritable) getPrimitiveWritableObject(o); 44 | return writable == null ? null : Long.valueOf(writable.get()); 45 | } 46 | 47 | @Override 48 | public long get(Object o) { 49 | return getPrimitiveWritableObject(o).get(); 50 | } 51 | 52 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive 53 | public PrimitiveTypeInfo getTypeInfo() { 54 | return (PrimitiveTypeInfo) TypeInfoFactory.longTypeInfo; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyMap.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | public class OrcLazyMap extends OrcLazyObject { 24 | 25 | public OrcLazyMap(LazyMapTreeReader treeReader) { 26 | super(treeReader); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyMapObjectInspector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 29 | import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; 30 | 31 | import com.facebook.hive.orc.OrcProto; 32 | 33 | public class OrcLazyMapObjectInspector implements MapObjectInspector { 34 | 35 | private final ObjectInspector key; 36 | private final ObjectInspector value; 37 | 38 | public OrcLazyMapObjectInspector(MapTypeInfo info) { 39 | key = OrcLazyObjectInspectorUtils.createWritableObjectInspector(info.getMapKeyTypeInfo()); 40 | value = OrcLazyObjectInspectorUtils.createWritableObjectInspector(info.getMapValueTypeInfo()); 41 | } 42 | 43 | public OrcLazyMapObjectInspector(int columnId, List types) { 44 | OrcProto.Type type = types.get(columnId); 45 | key = OrcLazyObjectInspectorUtils.createWritableObjectInspector(type.getSubtypes(0), types); 46 | value = OrcLazyObjectInspectorUtils.createWritableObjectInspector(type.getSubtypes(1), types); 47 | } 48 | 49 | @Override 50 | public Map getMap(Object data) { 51 | if (data == null) { 52 | return null; 53 | } 54 | 55 | try { 56 | return (Map) ((OrcLazyMap) data).materialize(); 57 | } catch (IOException e) { 58 | throw new RuntimeException(e); 59 | } 60 | } 61 | 62 | @Override 63 | public ObjectInspector getMapKeyObjectInspector() { 64 | return key; 65 | } 66 | 67 | @Override 68 | public int getMapSize(Object data) { 69 | if (data == null) { 70 | return -1; 71 | } 72 | 73 | return getMap(data).size(); 74 | } 75 | 76 | @Override 77 | public Object getMapValueElement(Object data, Object key) { 78 | if (data == null) { 79 | return null; 80 | } 81 | 82 | return getMap(data).get(key); 83 | } 84 | 85 | @Override 86 | public ObjectInspector getMapValueObjectInspector() { 87 | return value; 88 | } 89 | 90 | @Override 91 | public Category getCategory() { 92 | return Category.MAP; 93 | } 94 | 95 | @Override 96 | public String getTypeName() { 97 | return "map<" + key.getTypeName() + "," + value.getTypeName() + ">"; 98 | } 99 | 100 | @Override 101 | public boolean equals(Object o) { 102 | if (o == null || o.getClass() != getClass()) { 103 | return false; 104 | } else if (o == this) { 105 | return true; 106 | } else { 107 | OrcLazyMapObjectInspector other = (OrcLazyMapObjectInspector) o; 108 | return other.key.equals(key) && other.value.equals(value); 109 | } 110 | } 111 | 112 | } 113 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyPrimitiveObjectInspector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | 25 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; 26 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; 27 | import org.apache.hadoop.io.Writable; 28 | 29 | public abstract class OrcLazyPrimitiveObjectInspector implements PrimitiveObjectInspector { 30 | 31 | protected final transient PrimitiveTypeEntry typeEntry; 32 | 33 | protected OrcLazyPrimitiveObjectInspector(PrimitiveTypeEntry typeEntry) { 34 | this.typeEntry = typeEntry; 35 | } 36 | 37 | @SuppressWarnings("unchecked") 38 | @Override 39 | public U getPrimitiveWritableObject(Object o) { 40 | try { 41 | return o == null ? null : (U)((T)o).materialize(); 42 | } catch (IOException e) { 43 | throw new RuntimeException(e); 44 | } 45 | } 46 | 47 | @Override 48 | public boolean preferWritable() { 49 | return true; 50 | } 51 | 52 | /** 53 | * Return the associated Java primitive class for this primitive 54 | * ObjectInspector. 55 | */ 56 | @Override 57 | public Class getJavaPrimitiveClass() { 58 | return typeEntry.primitiveJavaClass; 59 | } 60 | 61 | /** 62 | * Return the associated primitive category for this primitive 63 | * ObjectInspector. 64 | */ 65 | @Override 66 | public PrimitiveCategory getPrimitiveCategory() { 67 | return typeEntry.primitiveCategory; 68 | } 69 | 70 | /** 71 | * Return the associated primitive Writable class for this primitive 72 | * ObjectInspector. 73 | */ 74 | @Override 75 | public Class getPrimitiveWritableClass() { 76 | return typeEntry.primitiveWritableClass; 77 | } 78 | 79 | /** 80 | * Return the associated category this primitive ObjectInspector. 81 | */ 82 | @Override 83 | public Category getCategory() { 84 | return Category.PRIMITIVE; 85 | } 86 | 87 | /** 88 | * Return the type name for this primitive ObjectInspector. 89 | */ 90 | @Override 91 | public String getTypeName() { 92 | return typeEntry.typeName; 93 | } 94 | 95 | /** 96 | * The precision of the underlying data. 97 | */ 98 | @SuppressWarnings({"override", "UnusedDeclaration"}) // Hive 0.13 99 | public int precision() { 100 | return 0; 101 | } 102 | 103 | /** 104 | * The scale of the underlying data. 105 | */ 106 | @SuppressWarnings({"override", "UnusedDeclaration"}) // Hive 0.13 107 | public int scale() { 108 | return 0; 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyRow.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | import com.facebook.hive.orc.InStream; 28 | import com.facebook.hive.orc.OrcProto; 29 | import com.facebook.hive.orc.OrcStruct; 30 | import com.facebook.hive.orc.StreamName; 31 | import com.facebook.hive.orc.OrcProto.RowIndex; 32 | 33 | public class OrcLazyRow extends OrcLazyStruct { 34 | 35 | private OrcLazyObject[] fields; 36 | private final List fieldNames; 37 | 38 | public OrcLazyRow(OrcLazyObject[] fields, List fieldNames) { 39 | super(null); 40 | this.fields = fields; 41 | this.fieldNames = fieldNames; 42 | } 43 | 44 | @Override 45 | public void next() { 46 | super.next(); 47 | for (OrcLazyObject field : fields) { 48 | if (field != null) { 49 | field.next(); 50 | } 51 | } 52 | } 53 | 54 | @Override 55 | public void startStripe(Map streams, List encodings, 56 | RowIndex[] indexes, long rowBaseInStripe) throws IOException { 57 | for (OrcLazyObject field : fields) { 58 | if (field != null) { 59 | field.startStripe(streams, encodings, indexes, rowBaseInStripe); 60 | } 61 | } 62 | } 63 | 64 | @Override 65 | public Object materialize(long row, Object previous) throws IOException { 66 | OrcStruct previousRow; 67 | if (previous != null) { 68 | previousRow = (OrcStruct) previous; 69 | previousRow.setFieldNames(fieldNames); 70 | } else { 71 | previousRow = new OrcStruct(fieldNames); 72 | } 73 | for (int i = 0; i < fields.length; i++) { 74 | previousRow.setFieldValue(i, fields[i]); 75 | } 76 | return previousRow; 77 | } 78 | 79 | @Override 80 | public void seekToRow(long rowNumber) throws IOException { 81 | for (OrcLazyObject field : fields) { 82 | if (field != null) { 83 | field.seekToRow(rowNumber); 84 | } 85 | } 86 | } 87 | 88 | public int getNumFields() { 89 | return fields.length; 90 | } 91 | 92 | public OrcLazyObject getFieldValue(int index) { 93 | if (index >= fields.length) { 94 | return null; 95 | } 96 | 97 | return fields[index]; 98 | } 99 | 100 | public void reset(OrcLazyRow other) throws IOException { 101 | this.fields = other.getRawFields(); 102 | seekToRow(0); 103 | } 104 | 105 | public OrcLazyObject[] getRawFields() { 106 | return fields; 107 | } 108 | 109 | @Override 110 | public void close() throws IOException { 111 | for (OrcLazyObject field : fields) { 112 | if (field != null) { 113 | field.close(); 114 | } 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyRowObjectInspector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | import java.util.ArrayList; 25 | import java.util.List; 26 | 27 | import com.facebook.hive.orc.OrcProto; 28 | import com.facebook.hive.orc.OrcStruct.Field; 29 | import org.apache.hadoop.hive.serde2.objectinspector.StructField; 30 | import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; 31 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; 32 | 33 | public class OrcLazyRowObjectInspector extends OrcLazyStructObjectInspector { 34 | 35 | public OrcLazyRowObjectInspector(StructTypeInfo info) { 36 | super(info.getAllStructFieldNames().size()); 37 | ArrayList fieldNames = info.getAllStructFieldNames(); 38 | ArrayList fieldTypes = info.getAllStructFieldTypeInfos(); 39 | for(int i=0; i < fieldNames.size(); ++i) { 40 | fields.add(new Field(fieldNames.get(i), 41 | OrcLazyObjectInspectorUtils.createLazyObjectInspector(fieldTypes.get(i)), i)); 42 | } 43 | } 44 | 45 | public OrcLazyRowObjectInspector(int columnId, List types) { 46 | super(types.get(columnId).getSubtypesCount()); 47 | OrcProto.Type type = types.get(columnId); 48 | int fieldCount = type.getSubtypesCount(); 49 | for(int i=0; i < fieldCount; ++i) { 50 | int fieldType = type.getSubtypes(i); 51 | fields.add(new Field(type.getFieldNames(i), 52 | OrcLazyObjectInspectorUtils.createLazyObjectInspector(fieldType, types), i)); 53 | } 54 | } 55 | 56 | @Override 57 | public Object getStructFieldData(Object data, StructField fieldRef) { 58 | int offset = ((Field) fieldRef).getOffset(); 59 | 60 | try { 61 | OrcLazyObject obj = ((OrcLazyRow) data).getFieldValue(offset); 62 | if (obj != null) { 63 | obj.materialize(); 64 | return obj.nextIsNull() ? null : obj; 65 | } 66 | return null; 67 | } catch (IOException e) { 68 | throw new RuntimeException(e); 69 | } 70 | } 71 | 72 | @Override 73 | public StructField getStructFieldRef(String fieldName) { 74 | for(StructField field: fields) { 75 | if (field.getFieldName().equals(fieldName)) { 76 | return field; 77 | } 78 | } 79 | return null; 80 | } 81 | 82 | @Override 83 | public List getStructFieldsDataAsList(Object data) { 84 | OrcLazyRow row = (OrcLazyRow) data; 85 | int numFields = row.getNumFields(); 86 | List result = new ArrayList(numFields); 87 | for (int i = 0; i < numFields; i++) { 88 | try { 89 | OrcLazyObject obj = (OrcLazyObject) row.getFieldValue(i); 90 | result.add(obj == null || obj.nextIsNull() ? null : obj); 91 | } catch (IOException e) { 92 | throw new RuntimeException(e); 93 | } 94 | 95 | } 96 | return result; 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyShort.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.hive.serde2.io.ShortWritable; 24 | 25 | public class OrcLazyShort extends OrcLazyObject { 26 | 27 | public OrcLazyShort(LazyShortTreeReader treeReader) { 28 | super(treeReader); 29 | } 30 | 31 | public OrcLazyShort(OrcLazyShort copy) { 32 | super(copy); 33 | if (copy.previous != null) { 34 | previous = new ShortWritable(((ShortWritable)copy.previous).get()); 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyShortObjectInspector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.hive.serde2.io.ShortWritable; 24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; 25 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; 26 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; 27 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; 28 | 29 | public class OrcLazyShortObjectInspector extends OrcLazyPrimitiveObjectInspector 30 | implements ShortObjectInspector { 31 | 32 | protected OrcLazyShortObjectInspector() { 33 | super(PrimitiveObjectInspectorUtils.shortTypeEntry); 34 | } 35 | 36 | @Override 37 | public short get(Object o) { 38 | return ((ShortWritable)getPrimitiveWritableObject(o)).get(); 39 | } 40 | 41 | @Override 42 | public Object copyObject(Object o) { 43 | return o == null ? null : new OrcLazyShort((OrcLazyShort) o); 44 | } 45 | 46 | @Override 47 | public Object getPrimitiveJavaObject(Object o) { 48 | ShortWritable writable = getPrimitiveWritableObject(o); 49 | return writable == null ? null : Short.valueOf(writable.get()); 50 | } 51 | 52 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive 53 | public PrimitiveTypeInfo getTypeInfo() { 54 | return (PrimitiveTypeInfo) TypeInfoFactory.shortTypeInfo; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyString.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.io.Text; 24 | 25 | public class OrcLazyString extends OrcLazyObject { 26 | 27 | public OrcLazyString(LazyStringTreeReader treeReader) { 28 | super(treeReader); 29 | } 30 | 31 | public OrcLazyString(OrcLazyString copy) { 32 | super(copy); 33 | if (copy.previous != null) { 34 | previous = new Text((Text)copy.previous); 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyStringObjectInspector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; 24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; 25 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; 26 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; 27 | import org.apache.hadoop.io.Text; 28 | 29 | public class OrcLazyStringObjectInspector extends OrcLazyPrimitiveObjectInspector 30 | implements StringObjectInspector { 31 | 32 | protected OrcLazyStringObjectInspector() { 33 | super(PrimitiveObjectInspectorUtils.stringTypeEntry); 34 | } 35 | 36 | @Override 37 | public String getPrimitiveJavaObject(Object o) { 38 | Text text = getPrimitiveWritableObject(o); 39 | return text == null ? null : text.toString(); 40 | } 41 | 42 | @Override 43 | public Object copyObject(Object o) { 44 | return o == null ? null : new OrcLazyString((OrcLazyString) o); 45 | } 46 | 47 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive 48 | public PrimitiveTypeInfo getTypeInfo() { 49 | return (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyStruct.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | public class OrcLazyStruct extends OrcLazyObject { 24 | 25 | public OrcLazyStruct(LazyStructTreeReader treeReader) { 26 | super(treeReader); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyTimestamp.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import org.apache.hadoop.hive.serde2.io.TimestampWritable; 24 | 25 | public class OrcLazyTimestamp extends OrcLazyObject { 26 | 27 | public OrcLazyTimestamp(LazyTimestampTreeReader treeReader) { 28 | super(treeReader); 29 | } 30 | 31 | public OrcLazyTimestamp(OrcLazyTimestamp copy) { 32 | super(copy); 33 | if (copy.previous != null) { 34 | previous = new TimestampWritable(((TimestampWritable)copy.previous)); 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyTimestampObjectInspector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.sql.Timestamp; 24 | 25 | import org.apache.hadoop.hive.serde2.io.TimestampWritable; 26 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; 27 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; 28 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; 29 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; 30 | 31 | public class OrcLazyTimestampObjectInspector extends 32 | OrcLazyPrimitiveObjectInspector implements TimestampObjectInspector { 33 | 34 | protected OrcLazyTimestampObjectInspector() { 35 | super(PrimitiveObjectInspectorUtils.timestampTypeEntry); 36 | } 37 | 38 | @Override 39 | public Timestamp getPrimitiveJavaObject(Object o) { 40 | TimestampWritable writable = getPrimitiveWritableObject(o); 41 | return writable == null ? null : writable.getTimestamp(); 42 | } 43 | 44 | @Override 45 | public Object copyObject(Object o) { 46 | return o == null ? null : new OrcLazyTimestamp((OrcLazyTimestamp) o); 47 | } 48 | 49 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive 50 | public PrimitiveTypeInfo getTypeInfo() { 51 | return (PrimitiveTypeInfo) TypeInfoFactory.timestampTypeInfo; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyUnion.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | public class OrcLazyUnion extends OrcLazyObject { 24 | 25 | public OrcLazyUnion(LazyUnionTreeReader treeReader) { 26 | super(treeReader); 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyUnionObjectInspector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package com.facebook.hive.orc.lazy; 22 | 23 | import java.io.IOException; 24 | import java.util.ArrayList; 25 | import java.util.List; 26 | 27 | import com.facebook.hive.orc.OrcProto; 28 | import com.facebook.hive.orc.OrcUnion; 29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 30 | import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; 31 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; 32 | import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; 33 | 34 | public class OrcLazyUnionObjectInspector implements UnionObjectInspector { 35 | 36 | private final List children; 37 | 38 | public OrcLazyUnionObjectInspector(int columnId, List types) { 39 | OrcProto.Type type = types.get(columnId); 40 | children = new ArrayList(type.getSubtypesCount()); 41 | for(int i=0; i < type.getSubtypesCount(); ++i) { 42 | children.add(OrcLazyObjectInspectorUtils.createWritableObjectInspector(type.getSubtypes(i), 43 | types)); 44 | } 45 | } 46 | 47 | public OrcLazyUnionObjectInspector(UnionTypeInfo info) { 48 | List unionChildren = info.getAllUnionObjectTypeInfos(); 49 | this.children = new ArrayList(unionChildren.size()); 50 | for(TypeInfo child: info.getAllUnionObjectTypeInfos()) { 51 | this.children.add(OrcLazyObjectInspectorUtils.createWritableObjectInspector(child)); 52 | } 53 | } 54 | 55 | private OrcUnion get(Object o) { 56 | if (o == null) { 57 | return null; 58 | } 59 | 60 | try { 61 | return (OrcUnion) ((OrcLazyUnion) o).materialize(); 62 | } catch (IOException e) { 63 | throw new RuntimeException(e); 64 | } 65 | } 66 | 67 | @Override 68 | public Object getField(Object o) { 69 | if (o == null) { 70 | return null; 71 | } 72 | 73 | return get(o).getObject(); 74 | } 75 | 76 | @Override 77 | public List getObjectInspectors() { 78 | return children; 79 | } 80 | 81 | @Override 82 | public byte getTag(Object o) { 83 | if (o == null) { 84 | return -1; 85 | } 86 | 87 | return get(o).getTag(); 88 | } 89 | 90 | @Override 91 | public Category getCategory() { 92 | return Category.UNION; 93 | } 94 | 95 | @Override 96 | public String getTypeName() { 97 | StringBuilder builder = new StringBuilder("uniontype<"); 98 | boolean first = true; 99 | for(ObjectInspector child: children) { 100 | if (first) { 101 | first = false; 102 | } else { 103 | builder.append(","); 104 | } 105 | builder.append(child.getTypeName()); 106 | } 107 | builder.append(">"); 108 | return builder.toString(); 109 | } 110 | 111 | @Override 112 | public boolean equals(Object o) { 113 | if (o == null || o.getClass() != getClass()) { 114 | return false; 115 | } else if (o == this) { 116 | return true; 117 | } else { 118 | List other = ((OrcLazyUnionObjectInspector) o).children; 119 | if (other.size() != children.size()) { 120 | return false; 121 | } 122 | for(int i = 0; i < children.size(); ++i) { 123 | if (!other.get(i).equals(children.get(i))) { 124 | return false; 125 | } 126 | } 127 | return true; 128 | } 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/package-info.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | /** 22 | * The Optimized Row Columnar (ORC) File Format. 23 | * 24 | * This format: 25 | *
    26 | *
  • Decomposes complex column types into primitives
  • 27 | *
  • Uses type-specific encoders for each column 28 | *
      29 | *
    • Dictionary encodings for low cardinality columns
    • 30 | *
    • Run length encoding of data
    • 31 | *
    • variable length encoding of integers
    • 32 | *
    33 | *
  • 34 | *
  • Divides file into large stripes
  • 35 | *
  • Each stripe includes light-weight indexes that enable the reader to 36 | * skip large sets of rows that don't satisfy the filter condition
  • 37 | *
  • A file footer that contains meta-information about file 38 | *
      39 | *
    • Precise byte range for each stripe
    • 40 | *
    • Type information for the file
    • 41 | *
    • Any user meta-information
    • 42 | *
    43 | *
  • 44 | *
  • Seek to row number is implemented to support secondary indexes
  • 45 | *
  • Support for additional generic compression: LZO, SNAPPY, ZLIB.
  • 46 | *
47 | * 48 | *

49 | * Format: 50 | *

51 |  * {@code
52 |  * HEADER (3 bytes) "ORC"
53 |  * STRIPE (0 or more stripes)
54 |  * FILE-FOOTER
55 |  * POST SCRIPT
56 |  * PS LENGTH (1 byte)
57 |  * }
58 |  * 
59 | *

60 | * 61 | *

62 | * Stripe: 63 | *

64 |  * {@code
65 |  * INDEX-STREAM (0 or more)
66 |  * DATA-STREAM (0 or more)
67 |  * STRIPE-FOOTER
68 |  * }
69 |  * 
70 | *

71 | */ 72 | package com.facebook.hive.orc; 73 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/statistics/BooleanColumnStatistics.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.facebook.hive.orc.statistics; 21 | 22 | /** 23 | * Statistics for boolean columns. 24 | */ 25 | public interface BooleanColumnStatistics extends ColumnStatistics { 26 | long getFalseCount(); 27 | 28 | long getTrueCount(); 29 | } 30 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/statistics/ColumnStatistics.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.facebook.hive.orc.statistics; 21 | 22 | /** 23 | * Statistics that are available for all types of columns. 24 | */ 25 | public interface ColumnStatistics { 26 | /** 27 | * Get the number of values in this column. It will differ from the number 28 | * of rows because of NULL values and repeated values. 29 | * @return the number of values 30 | */ 31 | long getNumberOfValues(); 32 | } 33 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/statistics/DoubleColumnStatistics.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.facebook.hive.orc.statistics; 21 | 22 | /** 23 | * Statistics for float and double columns. 24 | */ 25 | public interface DoubleColumnStatistics extends ColumnStatistics { 26 | 27 | /** 28 | * Get the smallest value in the column. Only defined if getNumberOfValues 29 | * is non-zero. 30 | * @return the minimum 31 | */ 32 | double getMinimum(); 33 | 34 | /** 35 | * Get the largest value in the column. Only defined if getNumberOfValues 36 | * is non-zero. 37 | * @return the maximum 38 | */ 39 | double getMaximum(); 40 | 41 | /** 42 | * Get the sum of the values in the column. 43 | * @return the sum 44 | */ 45 | double getSum(); 46 | } 47 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/statistics/IntegerColumnStatistics.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.facebook.hive.orc.statistics; 21 | 22 | /** 23 | * Statistics for all of the integer columns, such as byte, short, int, and 24 | * long. 25 | */ 26 | public interface IntegerColumnStatistics extends ColumnStatistics { 27 | /** 28 | * Get the smallest value in the column. Only defined if getNumberOfValues 29 | * is non-zero. 30 | * @return the minimum 31 | */ 32 | long getMinimum(); 33 | 34 | /** 35 | * Get the largest value in the column. Only defined if getNumberOfValues 36 | * is non-zero. 37 | * @return the maximum 38 | */ 39 | long getMaximum(); 40 | 41 | /** 42 | * Is the sum defined? If the sum overflowed the counter this will be false. 43 | * @return is the sum available 44 | */ 45 | boolean isSumDefined(); 46 | 47 | /** 48 | * Get the sum of the column. Only valid if isSumDefined returns true. 49 | * @return the sum of the column 50 | */ 51 | long getSum(); 52 | } 53 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/java/com/facebook/hive/orc/statistics/StringColumnStatistics.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.facebook.hive.orc.statistics; 21 | 22 | /** 23 | * Statistics for string columns. 24 | */ 25 | public interface StringColumnStatistics extends ColumnStatistics { 26 | /** 27 | * Get the minimum string. 28 | * @return the minimum 29 | */ 30 | String getMinimum(); 31 | 32 | /** 33 | * Get the maximum string. 34 | * @return the maximum 35 | */ 36 | String getMaximum(); 37 | } 38 | -------------------------------------------------------------------------------- /hive-dwrf/src/main/resources/protobuf/com/facebook/hive/orc/orc_proto.proto: -------------------------------------------------------------------------------- 1 | package com.facebook.hive.orc; 2 | 3 | message IntegerStatistics { 4 | optional sint64 minimum = 1; 5 | optional sint64 maximum = 2; 6 | optional sint64 sum = 3; 7 | } 8 | 9 | message DoubleStatistics { 10 | optional double minimum = 1; 11 | optional double maximum = 2; 12 | optional double sum = 3; 13 | } 14 | 15 | message StringStatistics { 16 | optional string minimum = 1; 17 | optional string maximum = 2; 18 | } 19 | 20 | message BucketStatistics { 21 | repeated uint64 count = 1 [packed=true]; 22 | } 23 | 24 | message ColumnStatistics { 25 | optional uint64 numberOfValues = 1; 26 | optional IntegerStatistics intStatistics = 2; 27 | optional DoubleStatistics doubleStatistics = 3; 28 | optional StringStatistics stringStatistics = 4; 29 | optional BucketStatistics bucketStatistics = 5; 30 | } 31 | 32 | message RowIndexEntry { 33 | repeated uint64 positions = 1 [packed=true]; 34 | optional ColumnStatistics statistics = 2; 35 | } 36 | 37 | message RowIndex { 38 | repeated RowIndexEntry entry = 1; 39 | } 40 | 41 | message Stream { 42 | // if you add new index stream kinds, you need to make sure to update 43 | // StreamName to ensure it is added to the stripe in the right area 44 | enum Kind { 45 | PRESENT = 0; 46 | DATA = 1; 47 | LENGTH = 2; 48 | DICTIONARY_DATA = 3; 49 | DICTIONARY_COUNT = 4; 50 | NANO_DATA = 5; 51 | ROW_INDEX = 6; 52 | IN_DICTIONARY = 7; 53 | STRIDE_DICTIONARY = 8; 54 | STRIDE_DICTIONARY_LENGTH = 9; 55 | } 56 | required Kind kind = 1; 57 | optional uint32 column = 2; 58 | optional uint64 length = 3; 59 | optional bool useVInts = 4 [default = true]; 60 | } 61 | 62 | message ColumnEncoding { 63 | enum Kind { 64 | DIRECT = 0; 65 | DICTIONARY = 1; 66 | } 67 | required Kind kind = 1; 68 | optional uint32 dictionarySize = 2; 69 | } 70 | 71 | message StripeFooter { 72 | repeated Stream streams = 1; 73 | repeated ColumnEncoding columns = 2; 74 | } 75 | 76 | message Type { 77 | enum Kind { 78 | BOOLEAN = 0; 79 | BYTE = 1; 80 | SHORT = 2; 81 | INT = 3; 82 | LONG = 4; 83 | FLOAT = 5; 84 | DOUBLE = 6; 85 | STRING = 7; 86 | BINARY = 8; 87 | TIMESTAMP = 9; 88 | LIST = 10; 89 | MAP = 11; 90 | STRUCT = 12; 91 | UNION = 13; 92 | } 93 | required Kind kind = 1; 94 | repeated uint32 subtypes = 2 [packed=true]; 95 | repeated string fieldNames = 3; 96 | } 97 | 98 | message StripeInformation { 99 | optional uint64 offset = 1; 100 | optional uint64 indexLength = 2; 101 | optional uint64 dataLength = 3; 102 | optional uint64 footerLength = 4; 103 | optional uint64 numberOfRows = 5; 104 | optional uint64 rawDataSize = 6; 105 | } 106 | 107 | message UserMetadataItem { 108 | required string name = 1; 109 | required bytes value = 2; 110 | } 111 | 112 | message Footer { 113 | optional uint64 headerLength = 1; 114 | optional uint64 contentLength = 2; 115 | repeated StripeInformation stripes = 3; 116 | repeated Type types = 4; 117 | repeated UserMetadataItem metadata = 5; 118 | optional uint64 numberOfRows = 6; 119 | repeated ColumnStatistics statistics = 7; 120 | optional uint32 rowIndexStride = 8; 121 | optional uint64 rawDataSize = 9; 122 | } 123 | 124 | enum CompressionKind { 125 | NONE = 0; 126 | ZLIB = 1; 127 | SNAPPY = 2; 128 | LZO = 3; 129 | } 130 | 131 | // Serialized length must be less that 255 bytes 132 | message PostScript { 133 | optional uint64 footerLength = 1; 134 | optional CompressionKind compression = 2; 135 | optional uint64 compressionBlockSize = 3; 136 | } 137 | -------------------------------------------------------------------------------- /hive-dwrf/src/test/java/com/facebook/hive/orc/MemoryManagerWithForce.java: -------------------------------------------------------------------------------- 1 | package com.facebook.hive.orc; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | 5 | import java.io.IOException; 6 | 7 | /** 8 | * 9 | * MemoryManagerWithForce. 10 | * 11 | * An implementation of MemoryManager with the ability to force writers to flush their stripes 12 | * and to enter low memory mode. 13 | */ 14 | public class MemoryManagerWithForce extends MemoryManager { 15 | 16 | MemoryManagerWithForce(Configuration conf) { 17 | super(conf); 18 | } 19 | 20 | public void forceEnterLowMemoryMode() throws IOException { 21 | for (WriterInfo writer : writerList.values()) { 22 | writer.getCallback().enterLowMemoryMode(); 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /hive-dwrf/src/test/java/com/facebook/hive/orc/TestColumnStatistics.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.facebook.hive.orc; 21 | 22 | import com.facebook.hive.orc.statistics.ColumnStatisticsImpl; 23 | import com.facebook.hive.orc.statistics.DoubleColumnStatistics; 24 | import com.facebook.hive.orc.statistics.IntegerColumnStatistics; 25 | import com.facebook.hive.orc.statistics.StringColumnStatistics; 26 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 27 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 28 | import org.junit.Test; 29 | 30 | import static junit.framework.Assert.assertEquals; 31 | 32 | /** 33 | * Test ColumnStatisticsImpl for ORC. 34 | */ 35 | public class TestColumnStatistics { 36 | 37 | @Test 38 | public void testIntegerStatisticsMerge() throws Exception { 39 | ObjectInspector inspector = 40 | PrimitiveObjectInspectorFactory.javaIntObjectInspector; 41 | 42 | ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector); 43 | ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector); 44 | stats1.updateInteger(10); 45 | stats1.updateInteger(10); 46 | stats2.updateInteger(1); 47 | stats2.updateInteger(1000); 48 | stats1.merge(stats2); 49 | IntegerColumnStatistics typed = (IntegerColumnStatistics) stats1; 50 | assertEquals(1, typed.getMinimum()); 51 | assertEquals(1000, typed.getMaximum()); 52 | } 53 | 54 | @Test 55 | public void testDoubleStatisticsMerge() throws Exception { 56 | ObjectInspector inspector = 57 | PrimitiveObjectInspectorFactory.javaDoubleObjectInspector; 58 | 59 | ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector); 60 | ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector); 61 | stats1.updateDouble(10.0); 62 | stats1.updateDouble(100.0); 63 | stats2.updateDouble(1.0); 64 | stats2.updateDouble(1000.0); 65 | stats1.merge(stats2); 66 | DoubleColumnStatistics typed = (DoubleColumnStatistics) stats1; 67 | assertEquals(1.0, typed.getMinimum(), 0.001); 68 | assertEquals(1000.0, typed.getMaximum(), 0.001); 69 | } 70 | 71 | @Test 72 | public void testStringStatisticsMerge() throws Exception { 73 | ObjectInspector inspector = 74 | PrimitiveObjectInspectorFactory.javaStringObjectInspector; 75 | 76 | ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector); 77 | ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector); 78 | stats1.updateString("bob"); 79 | stats1.updateString("david"); 80 | stats1.updateString("charles"); 81 | stats2.updateString("anne"); 82 | stats2.updateString("erin"); 83 | stats1.merge(stats2); 84 | StringColumnStatistics strStats = (StringColumnStatistics) stats1; 85 | assertEquals("anne", strStats.getMinimum()); 86 | assertEquals("erin", strStats.getMaximum()); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /hive-dwrf/src/test/java/com/facebook/hive/orc/TestDynamicArray.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.facebook.hive.orc; 19 | 20 | import static org.junit.Assert.assertEquals; 21 | import static org.junit.Assert.assertTrue; 22 | 23 | import java.util.Random; 24 | 25 | import org.junit.Test; 26 | 27 | public class TestDynamicArray { 28 | 29 | @Test 30 | public void testByteArray() throws Exception { 31 | DynamicByteArray dba = new DynamicByteArray(10, new MemoryEstimate()); 32 | dba.add((byte) 0); 33 | dba.add((byte) 1); 34 | dba.set(3, (byte) 3); 35 | dba.set(2, (byte) 2); 36 | dba.add((byte) 4); 37 | assertEquals("{0,1,2,3,4}", dba.toString()); 38 | assertEquals(5, dba.size()); 39 | byte[] val; 40 | val = new byte[0]; 41 | 42 | assertEquals(0, dba.compare(val, 0, 0, 2, 0)); 43 | assertTrue(dba.compare(val, 0, 0, 2, 1) < 0); 44 | val = new byte[]{3,42}; 45 | assertTrue(dba.compare(val, 0, 1, 2, 0) > 0); 46 | assertTrue(dba.compare(val, 0, 1, 2, 1) > 0); 47 | assertEquals(0, dba.compare(val, 0, 1, 3, 1)); 48 | assertTrue(dba.compare(val, 0, 1, 3, 2) < 0); 49 | assertTrue(dba.compare(val, 0, 2, 3, 1) > 0); 50 | val = new byte[256]; 51 | for(int b=-128; b < 128; ++b) { 52 | dba.add((byte) b); 53 | val[b+128] = (byte) b; 54 | } 55 | assertEquals(0, dba.compare(val, 0, 256, 5, 256)); 56 | assertTrue(dba.compare(val, 0, 1, 0, 1) > 0); 57 | assertTrue(dba.compare(val, 254, 1, 0, 1) > 0); 58 | assertTrue(dba.compare(val, 120, 1, 64, 1) > 0); 59 | val = new byte[1024]; 60 | Random rand = new Random(1701); 61 | for(int i = 0; i < val.length; ++i) { 62 | rand.nextBytes(val); 63 | } 64 | dba.add(val, 0, 1024); 65 | assertEquals(1285, dba.size()); 66 | assertEquals(0, dba.compare(val, 0, 1024, 261, 1024)); 67 | } 68 | 69 | @Test 70 | public void testIntArray() throws Exception { 71 | DynamicIntArray dia = new DynamicIntArray(10, new MemoryEstimate()); 72 | for(int i=0; i < 10000; ++i) { 73 | dia.add(2*i); 74 | } 75 | assertEquals(10000, dia.size()); 76 | for(int i=0; i < 10000; ++i) { 77 | assertEquals(2*i, dia.get(i)); 78 | } 79 | dia.clear(); 80 | assertEquals(0, dia.size()); 81 | dia.add(3); 82 | dia.add(12); 83 | dia.add(65); 84 | assertEquals("{3,12,65}", dia.toString()); 85 | for(int i=0; i < 5; ++i) { 86 | dia.increment(i, 3); 87 | } 88 | assertEquals("{6,15,68,3,3}", dia.toString()); 89 | } 90 | 91 | @Test 92 | public void testLongArray() throws Exception { 93 | DynamicLongArray dia = new DynamicLongArray(10, new MemoryEstimate()); 94 | for(int i=0; i < 10000; ++i) { 95 | dia.add((long)(2*i) + (long)Integer.MAX_VALUE); 96 | } 97 | assertEquals(10000, dia.size()); 98 | for(int i=0; i < 10000; ++i) { 99 | assertEquals((long)(2*i) + (long)Integer.MAX_VALUE, dia.get(i)); 100 | } 101 | dia.clear(); 102 | assertEquals(0, dia.size()); 103 | dia.add(3); 104 | dia.add(12); 105 | dia.add(65); 106 | assertEquals("{3,12,65}", dia.toString()); 107 | for(int i=0; i < 5; ++i) { 108 | dia.increment(i, 3); 109 | } 110 | assertEquals("{6,15,68,3,3}", dia.toString()); 111 | 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /hive-dwrf/src/test/java/com/facebook/hive/orc/TestSerializationUtils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.facebook.hive.orc; 19 | 20 | import org.junit.Test; 21 | 22 | import java.io.ByteArrayInputStream; 23 | import java.io.ByteArrayOutputStream; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | public class TestSerializationUtils { 28 | 29 | @Test 30 | public void TestDoubles() throws Exception { 31 | ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 32 | SerializationUtils.writeDouble(buffer, 1343822337.759); 33 | assertEquals(1343822337.759, 34 | SerializationUtils.readDouble(new 35 | ByteArrayInputStream(buffer.toByteArray())), 0.0001); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /hive-dwrf/src/test/java/com/facebook/hive/orc/TestStreamName.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.facebook.hive.orc; 20 | 21 | import static org.junit.Assert.assertEquals; 22 | 23 | import org.junit.Test; 24 | 25 | public class TestStreamName { 26 | 27 | @Test 28 | public void test1() throws Exception { 29 | StreamName s1 = new StreamName(3, OrcProto.Stream.Kind.DATA); 30 | StreamName s2 = new StreamName(3, 31 | OrcProto.Stream.Kind.DICTIONARY_DATA); 32 | StreamName s3 = new StreamName(5, OrcProto.Stream.Kind.DATA); 33 | StreamName s4 = new StreamName(5, 34 | OrcProto.Stream.Kind.DICTIONARY_DATA); 35 | StreamName s1p = new StreamName(3, OrcProto.Stream.Kind.DATA); 36 | assertEquals(true, s1.equals(s1)); 37 | assertEquals(false, s1.equals(s2)); 38 | assertEquals(false, s1.equals(s3)); 39 | assertEquals(true, s1.equals(s1p)); 40 | assertEquals(true, s1.compareTo(null) < 0); 41 | assertEquals(false, s1.equals(null)); 42 | assertEquals(true, s1.compareTo(s2) > 0); 43 | assertEquals(true, s2.compareTo(s3) < 0); 44 | assertEquals(true, s3.compareTo(s4) > 0); 45 | assertEquals(true, s4.compareTo(s1p) < 0); 46 | assertEquals(0, s1p.compareTo(s1)); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /hive-dwrf/src/test/java/com/facebook/hive/orc/TestZlib.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.facebook.hive.orc; 20 | 21 | import com.facebook.hive.orc.compression.CompressionCodec; 22 | import com.facebook.hive.orc.compression.ZlibCodec; 23 | import org.junit.Test; 24 | 25 | import java.io.IOException; 26 | import java.nio.ByteBuffer; 27 | 28 | import static junit.framework.Assert.assertEquals; 29 | import static junit.framework.Assert.fail; 30 | 31 | public class TestZlib { 32 | 33 | @Test 34 | public void testNoOverflow() throws Exception { 35 | ByteBuffer in = ByteBuffer.allocate(10); 36 | ByteBuffer out = ByteBuffer.allocate(10); 37 | in.put(new byte[]{1,2,3,4,5,6,7,10}); 38 | in.flip(); 39 | CompressionCodec codec = new ZlibCodec(); 40 | assertEquals(false, codec.compress(in, out, null)); 41 | } 42 | 43 | @Test 44 | public void testCorrupt() throws Exception { 45 | ByteBuffer buf = ByteBuffer.allocate(1000); 46 | buf.put(new byte[]{127,-128,0,99,98,-1}); 47 | buf.flip(); 48 | CompressionCodec codec = new ZlibCodec(); 49 | ByteBuffer out = ByteBuffer.allocate(1000); 50 | try { 51 | codec.decompress(buf, out); 52 | fail(); 53 | } catch (IOException ioe) { 54 | // EXPECTED 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /hive-dwrf/src/test/java/com/facebook/hive/orc/WriterImplWithForceFlush.java: -------------------------------------------------------------------------------- 1 | package com.facebook.hive.orc; 2 | 3 | import com.facebook.hive.orc.compression.CompressionKind; 4 | import org.apache.hadoop.conf.Configuration; 5 | import org.apache.hadoop.fs.FileSystem; 6 | import org.apache.hadoop.fs.Path; 7 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 8 | 9 | import java.io.IOException; 10 | 11 | public class WriterImplWithForceFlush extends WriterImpl { 12 | public WriterImplWithForceFlush(FileSystem fs, Path path, Configuration conf, 13 | ObjectInspector inspector, long stripeSize, CompressionKind compress, int bufferSize, 14 | int rowIndexStride, MemoryManager memoryManager) throws IOException { 15 | super(fs, path, conf, inspector, stripeSize, compress, bufferSize, rowIndexStride, 16 | memoryManager); 17 | } 18 | 19 | public void forceFlushStripe() throws IOException { 20 | flushStripe(); 21 | } 22 | } 23 | --------------------------------------------------------------------------------