├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── hive-dwrf-shims
├── pom.xml
└── src
│ └── main
│ └── java
│ └── org
│ └── apache
│ └── hadoop
│ └── hive
│ ├── common
│ └── io
│ │ └── RawDatasizeConst.java
│ ├── ql
│ └── io
│ │ └── slice
│ │ ├── BasicSliceInput.java
│ │ ├── BasicSliceOutput.java
│ │ ├── InputStreamSliceInput.java
│ │ ├── OutputStreamSliceOutput.java
│ │ ├── RuntimeIOException.java
│ │ ├── SizeOf.java
│ │ ├── Slice.java
│ │ ├── SliceInput.java
│ │ ├── SliceOutput.java
│ │ └── Slices.java
│ └── serde2
│ ├── ReaderWriterProfiler.java
│ └── objectinspector
│ └── FBSettableUnionObjectInspector.java
├── hive-dwrf
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── com
│ │ │ └── facebook
│ │ │ └── hive
│ │ │ └── orc
│ │ │ ├── BitFieldReader.java
│ │ │ ├── BitFieldWriter.java
│ │ │ ├── DictionaryEncoder.java
│ │ │ ├── DynamicArray.java
│ │ │ ├── DynamicByteArray.java
│ │ │ ├── DynamicIntArray.java
│ │ │ ├── DynamicLongArray.java
│ │ │ ├── FileDump.java
│ │ │ ├── InStream.java
│ │ │ ├── IntDictionaryEncoder.java
│ │ │ ├── MemoryEstimate.java
│ │ │ ├── MemoryManager.java
│ │ │ ├── NotAnORCFileException.java
│ │ │ ├── OrcConf.java
│ │ │ ├── OrcFile.java
│ │ │ ├── OrcInputFormat.java
│ │ │ ├── OrcOutputFormat.java
│ │ │ ├── OrcSerde.java
│ │ │ ├── OrcStruct.java
│ │ │ ├── OrcUnion.java
│ │ │ ├── OutStream.java
│ │ │ ├── PositionRecorder.java
│ │ │ ├── PositionedOutputStream.java
│ │ │ ├── Reader.java
│ │ │ ├── ReaderImpl.java
│ │ │ ├── RecordReader.java
│ │ │ ├── RecordReaderImpl.java
│ │ │ ├── RunLengthByteReader.java
│ │ │ ├── RunLengthByteWriter.java
│ │ │ ├── RunLengthConstants.java
│ │ │ ├── RunLengthIntegerReader.java
│ │ │ ├── RunLengthIntegerWriter.java
│ │ │ ├── SerializationUtils.java
│ │ │ ├── StreamName.java
│ │ │ ├── StringDictionaryEncoder.java
│ │ │ ├── StripeInformation.java
│ │ │ ├── StripeReader.java
│ │ │ ├── Writer.java
│ │ │ ├── WriterImpl.java
│ │ │ ├── compression
│ │ │ ├── CompressionCodec.java
│ │ │ ├── CompressionKind.java
│ │ │ ├── SnappyCodec.java
│ │ │ └── ZlibCodec.java
│ │ │ ├── lazy
│ │ │ ├── LazyBinaryTreeReader.java
│ │ │ ├── LazyBooleanTreeReader.java
│ │ │ ├── LazyByteTreeReader.java
│ │ │ ├── LazyDoubleTreeReader.java
│ │ │ ├── LazyFloatTreeReader.java
│ │ │ ├── LazyIntDictionaryTreeReader.java
│ │ │ ├── LazyIntDirectTreeReader.java
│ │ │ ├── LazyIntTreeReader.java
│ │ │ ├── LazyListTreeReader.java
│ │ │ ├── LazyLongDictionaryTreeReader.java
│ │ │ ├── LazyLongDirectTreeReader.java
│ │ │ ├── LazyLongTreeReader.java
│ │ │ ├── LazyMapTreeReader.java
│ │ │ ├── LazyNumericDictionaryTreeReader.java
│ │ │ ├── LazyNumericDirectTreeReader.java
│ │ │ ├── LazyShortDictionaryTreeReader.java
│ │ │ ├── LazyShortDirectTreeReader.java
│ │ │ ├── LazyShortTreeReader.java
│ │ │ ├── LazyStringDictionaryTreeReader.java
│ │ │ ├── LazyStringDirectTreeReader.java
│ │ │ ├── LazyStringTreeReader.java
│ │ │ ├── LazyStructTreeReader.java
│ │ │ ├── LazyTimestampTreeReader.java
│ │ │ ├── LazyTreeReader.java
│ │ │ ├── LazyUnionTreeReader.java
│ │ │ ├── OrcLazyBinary.java
│ │ │ ├── OrcLazyBinaryObjectInspector.java
│ │ │ ├── OrcLazyBoolean.java
│ │ │ ├── OrcLazyBooleanObjectInspector.java
│ │ │ ├── OrcLazyByte.java
│ │ │ ├── OrcLazyByteObjectInspector.java
│ │ │ ├── OrcLazyDouble.java
│ │ │ ├── OrcLazyDoubleObjectInspector.java
│ │ │ ├── OrcLazyFloat.java
│ │ │ ├── OrcLazyFloatObjectInspector.java
│ │ │ ├── OrcLazyInt.java
│ │ │ ├── OrcLazyIntObjectInspector.java
│ │ │ ├── OrcLazyList.java
│ │ │ ├── OrcLazyListObjectInspector.java
│ │ │ ├── OrcLazyLong.java
│ │ │ ├── OrcLazyLongObjectInspector.java
│ │ │ ├── OrcLazyMap.java
│ │ │ ├── OrcLazyMapObjectInspector.java
│ │ │ ├── OrcLazyObject.java
│ │ │ ├── OrcLazyObjectInspectorUtils.java
│ │ │ ├── OrcLazyPrimitiveObjectInspector.java
│ │ │ ├── OrcLazyRow.java
│ │ │ ├── OrcLazyRowObjectInspector.java
│ │ │ ├── OrcLazyShort.java
│ │ │ ├── OrcLazyShortObjectInspector.java
│ │ │ ├── OrcLazyString.java
│ │ │ ├── OrcLazyStringObjectInspector.java
│ │ │ ├── OrcLazyStruct.java
│ │ │ ├── OrcLazyStructObjectInspector.java
│ │ │ ├── OrcLazyTimestamp.java
│ │ │ ├── OrcLazyTimestampObjectInspector.java
│ │ │ ├── OrcLazyUnion.java
│ │ │ └── OrcLazyUnionObjectInspector.java
│ │ │ ├── package-info.java
│ │ │ └── statistics
│ │ │ ├── BooleanColumnStatistics.java
│ │ │ ├── ColumnStatistics.java
│ │ │ ├── ColumnStatisticsImpl.java
│ │ │ ├── DoubleColumnStatistics.java
│ │ │ ├── IntegerColumnStatistics.java
│ │ │ └── StringColumnStatistics.java
│ └── resources
│ │ └── protobuf
│ │ └── com
│ │ └── facebook
│ │ └── hive
│ │ └── orc
│ │ └── orc_proto.proto
│ └── test
│ ├── java
│ └── com
│ │ └── facebook
│ │ └── hive
│ │ └── orc
│ │ ├── MemoryManagerWithForce.java
│ │ ├── OrcTestUtils.java
│ │ ├── TestBitFieldReader.java
│ │ ├── TestColumnStatistics.java
│ │ ├── TestDynamicArray.java
│ │ ├── TestFileDump.java
│ │ ├── TestInStream.java
│ │ ├── TestInputOutputFormat.java
│ │ ├── TestIntDictionaryEncoder.java
│ │ ├── TestMapTreeWriter.java
│ │ ├── TestMemoryManager.java
│ │ ├── TestObjectInspector.java
│ │ ├── TestOrcFile.java
│ │ ├── TestOrcStruct.java
│ │ ├── TestRecordReaderImpl.java
│ │ ├── TestRunLengthByteReader.java
│ │ ├── TestRunLengthIntegerReader.java
│ │ ├── TestSerializationUtils.java
│ │ ├── TestStreamName.java
│ │ ├── TestStringDictionaryEncoder.java
│ │ ├── TestZlib.java
│ │ └── WriterImplWithForceFlush.java
│ └── resources
│ ├── orc-file-dump-dictionary-threshold-unsorted.out
│ ├── orc-file-dump-dictionary-threshold-unsorted2.out
│ ├── orc-file-dump-dictionary-threshold.out
│ ├── orc-file-dump-entropy-threshold.out
│ └── orc-file-dump.out
└── pom.xml
/.gitignore:
--------------------------------------------------------------------------------
1 | *.iml
2 | *.ipr
3 | *.iws
4 | target/
5 | /var
6 | /*/var/
7 | pom.xml.versionsBackup
8 | test-output/
9 | /atlassian-ide-plugin.xml
10 | .idea
11 | .DS_Store
12 | .classpath
13 | .settings
14 | .project
15 | temp-testng-customsuite.xml
16 | test-output
17 | .externalToolBuilders
18 | *~
19 |
20 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to DWRF
2 |
3 | We are currently not accepting pull requests. DWRF is a fork of the ORC file format which is part of the Apache Hive project, please submit your contributions there.
4 |
5 | http://hive.apache.org
6 |
7 | https://issues.apache.org/jira/browse/HIVE
8 |
9 | https://cwiki.apache.org/confluence/display/Hive/LanguageManual+ORC
10 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | - - -
2 |
3 | **_This project is not actively maintained. Proceed at your own risk!_**
4 |
5 | - - -
6 |
7 | hive-dwrf
8 | =========
9 |
10 | DWRF file format for Hive
11 |
12 |
13 | This is a fork of the ORC file format developed as part of the Apache Hive project https://cwiki.apache.org/confluence/display/Hive/LanguageManual+ORC
14 |
15 | We are not currently accepting pull requests, please submit your changes to the Apache Hive project.
16 |
--------------------------------------------------------------------------------
/hive-dwrf-shims/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
17 |
18 | 4.0.0
19 |
20 |
21 | com.facebook.hive
22 | hive-dwrf-root
23 | 0.18.10-SNAPSHOT
24 |
25 |
26 | hive-dwrf-shims
27 |
28 |
29 | ${project.parent.basedir}
30 |
31 |
32 |
33 |
34 | com.facebook.presto.hive
35 | hive-apache
36 | true
37 |
38 |
39 |
40 | com.facebook.presto.hadoop
41 | hadoop-cdh4
42 | true
43 |
44 |
45 |
46 | com.google.guava
47 | guava
48 |
49 |
50 |
51 | commons-logging
52 | commons-logging
53 |
54 |
55 |
56 |
--------------------------------------------------------------------------------
/hive-dwrf-shims/src/main/java/org/apache/hadoop/hive/common/io/RawDatasizeConst.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package org.apache.hadoop.hive.common.io;
22 |
23 | /**
24 | * A class that defines the constant strings used by the raw datasize calculation.
25 | *
26 | * It's shared among RC file format and ORC file format
27 | */
28 | public final class RawDatasizeConst {
29 |
30 | public final static short NULL_SIZE = 1;
31 |
32 | public final static short BOOLEAN_SIZE = 1;
33 |
34 | public final static short BYTE_SIZE = 1;
35 |
36 | public final static short SHORT_SIZE = 2;
37 |
38 | public final static short INT_SIZE = 4;
39 |
40 | public final static short LONG_SIZE = 8;
41 |
42 | public final static short FLOAT_SIZE = 4;
43 |
44 | public final static short DOUBLE_SIZE = 8;
45 |
46 | /**
47 | * Raw data size is:
48 | * the number of bytes needed to store the milliseconds since the epoch
49 | * (8 since it's a long)
50 | * +
51 | * the number of bytes needed to store the nanos field (4 since it's an int)
52 | */
53 | public final static short TIMESTAMP_SIZE = 12;
54 |
55 | /**
56 | * UNION raw data size is size of tag (1) + size of value
57 | */
58 | public final static short UNION_TAG_SIZE = 1;
59 |
60 | /**
61 | * For string type: Raw data size is length - no. of bytes
62 | *
63 | * For binary type: Raw data size is the length of the BytesWritable, i.e. the number of bytes
64 | */
65 |
66 | /**
67 | * ORC calcuate decimal in HIVE-4120 in such way
68 | *
69 | * @Override
70 | * void write(Object obj) throws IOException {
71 | * super.write(obj);
72 | * if (obj != null) {
73 | * BigDecimal decimal = ((BigDecimalObjectInspector) inspector).
74 | * getPrimitiveJavaObject(obj);
75 | * SerializationUtils.writeBigInteger(valueStream,
76 | * decimal.unscaledValue());
77 | * scaleStream.write(decimal.scale());
78 | * indexStatistics.updateDecimal(decimal);
79 | * }
80 | * }
81 | *
82 | * we can simulate it while calculating rawdatasize:
83 | * 4 + ceiling(log2(n)/8.0) bytes
84 | * 1. scale - int - 4 byes
85 | * 2. BigInteger size: ceiling(log2(n)/8.0) bytes
86 | */
87 | public final static short BIGDECIMAL_SCALE_SIZE = 4;
88 |
89 | }
90 |
--------------------------------------------------------------------------------
/hive-dwrf-shims/src/main/java/org/apache/hadoop/hive/ql/io/slice/RuntimeIOException.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package org.apache.hadoop.hive.ql.io.slice;
22 |
23 | import java.io.IOException;
24 |
25 | public class RuntimeIOException
26 | extends RuntimeException
27 | {
28 | public RuntimeIOException(IOException cause)
29 | {
30 | super(cause);
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/hive-dwrf-shims/src/main/java/org/apache/hadoop/hive/serde2/objectinspector/FBSettableUnionObjectInspector.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package org.apache.hadoop.hive.serde2.objectinspector;
19 |
20 | /**
21 | * FBSettableUnionObjectInspector.
22 | *
23 | */
24 | public interface FBSettableUnionObjectInspector extends UnionObjectInspector {
25 |
26 | /**
27 | * Create a union object with tag 0 and object null
28 | */
29 | public abstract Object create();
30 |
31 | /**
32 | * Update the union with the tag and the object
33 | */
34 | public abstract void setField(Object union, byte tag, Object field);
35 | }
36 |
--------------------------------------------------------------------------------
/hive-dwrf/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
17 |
18 | 4.0.0
19 |
20 |
21 | com.facebook.hive
22 | hive-dwrf-root
23 | 0.18.10-SNAPSHOT
24 |
25 |
26 | hive-dwrf
27 |
28 |
29 | ${project.parent.basedir}
30 |
31 |
32 |
33 |
34 | com.facebook.hive
35 | hive-dwrf-shims
36 |
37 |
38 |
39 | com.facebook.presto.hadoop
40 | hadoop-cdh4
41 | true
42 |
43 |
44 |
45 | com.facebook.presto.hive
46 | hive-apache
47 | true
48 |
49 |
50 |
51 | com.google.protobuf
52 | protobuf-java
53 |
54 |
55 |
56 | commons-logging
57 | commons-logging
58 |
59 |
60 |
61 | it.unimi.dsi
62 | fastutil
63 |
64 |
65 |
66 | org.iq80.snappy
67 | snappy
68 |
69 |
70 |
71 | com.google.guava
72 | guava
73 |
74 |
75 |
76 |
77 | junit
78 | junit-dep
79 | test
80 |
81 |
82 |
83 | org.mockito
84 | mockito-core
85 | test
86 |
87 |
88 |
89 | org.hamcrest
90 | hamcrest-core
91 | test
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 | maven-antrun-plugin
100 |
101 |
102 | generate-sources
103 | generate-sources
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 | target/generated-sources/protobuf/gen-java
114 |
115 |
116 | run
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/BitFieldReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package com.facebook.hive.orc;
21 |
22 | import java.io.EOFException;
23 | import java.io.IOException;
24 | import java.util.List;
25 |
26 | import com.facebook.hive.orc.OrcProto.RowIndexEntry;
27 |
28 | public class BitFieldReader {
29 | private final RunLengthByteReader input;
30 | private int current;
31 | private int bitsLeft;
32 | // The number of consumed bytes at each index stride
33 | private int[] indeces;
34 |
35 | public BitFieldReader(InStream input) throws IOException {
36 | this.input = new RunLengthByteReader(input);
37 | }
38 |
39 | private void readByte() throws IOException {
40 | if (input.hasNext()) {
41 | current = 0xff & input.next();
42 | bitsLeft = 8;
43 | } else {
44 | throw new EOFException("Read past end of bit field from " + input);
45 | }
46 | }
47 |
48 | public int next() throws IOException {
49 | int result = 0;
50 |
51 | if (bitsLeft == 0) {
52 | readByte();
53 | }
54 |
55 | bitsLeft--;
56 | result |= (current >>> bitsLeft) & 1;
57 |
58 | return result & 1;
59 | }
60 |
61 | public void seek(int index) throws IOException {
62 | input.seek(index);
63 | int consumed = (int) indeces[index];
64 | if (consumed > 8) {
65 | throw new IllegalArgumentException("Seek past end of byte at " +
66 | consumed + " in " + input);
67 | } else if (consumed != 0) {
68 | readByte();
69 | bitsLeft = 8 - consumed;
70 | } else {
71 | bitsLeft = 0;
72 | }
73 | }
74 |
75 | /**
76 | * Read in the number of bytes consumed at each index entry and store it,
77 | * also call loadIndeces on child stream and return the index of the next
78 | * streams indexes.
79 | */
80 | public int loadIndeces(List rowIndexEntries, int startIndex) {
81 | int updatedStartIndex = input.loadIndeces(rowIndexEntries, startIndex);
82 |
83 | int numIndeces = rowIndexEntries.size();
84 | indeces = new int[numIndeces + 1];
85 | int i = 0;
86 | for (RowIndexEntry rowIndexEntry : rowIndexEntries) {
87 | indeces[i] = (int) rowIndexEntry.getPositions(updatedStartIndex);
88 | i++;
89 | }
90 | return updatedStartIndex + 1;
91 | }
92 |
93 | public void skip(long items) throws IOException {
94 | long totalBits = items;
95 | if (bitsLeft >= totalBits) {
96 | bitsLeft -= totalBits;
97 | } else {
98 | totalBits -= bitsLeft;
99 | input.skip(totalBits / 8);
100 | bitsLeft = (int) (8 - (totalBits % 8));
101 |
102 | // Load the next value only if the stream still has data. If not,
103 | // then mark bitsLeft as zero to force exception when values are
104 | // attempted to be read.
105 | if (input.hasNext()) {
106 | current = input.next();
107 | } else {
108 | bitsLeft = 0;
109 | }
110 | }
111 | }
112 |
113 | public void close() throws IOException {
114 | input.close();
115 | }
116 | }
117 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/BitFieldWriter.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package com.facebook.hive.orc;
21 |
22 | import java.io.IOException;
23 |
24 | class BitFieldWriter {
25 | private RunLengthByteWriter output;
26 | private final int bitSize;
27 | private byte current = 0;
28 | private int bitsLeft = 8;
29 |
30 | BitFieldWriter(PositionedOutputStream output,
31 | int bitSize) throws IOException {
32 | this.output = new RunLengthByteWriter(output);
33 | this.bitSize = bitSize;
34 | }
35 |
36 | private void writeByte() throws IOException {
37 | output.write(current);
38 | current = 0;
39 | bitsLeft = 8;
40 | }
41 |
42 | void flush() throws IOException {
43 | if (bitsLeft != 8) {
44 | writeByte();
45 | }
46 | output.flush();
47 | }
48 |
49 | void write(int value) throws IOException {
50 | int bitsToWrite = bitSize;
51 | while (bitsToWrite > bitsLeft) {
52 | // add the bits to the bottom of the current word
53 | current |= value >>> (bitsToWrite - bitsLeft);
54 | // subtract out the bits we just added
55 | bitsToWrite -= bitsLeft;
56 | // zero out the bits above bitsToWrite
57 | value &= (1 << bitsToWrite) - 1;
58 | writeByte();
59 | }
60 | bitsLeft -= bitsToWrite;
61 | current |= value << bitsLeft;
62 | if (bitsLeft == 0) {
63 | writeByte();
64 | }
65 | }
66 |
67 | void getPosition(PositionRecorder recorder) throws IOException {
68 | output.getPosition(recorder);
69 | recorder.addPosition(8 - bitsLeft);
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/DictionaryEncoder.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc;
22 |
23 | import java.io.IOException;
24 | import java.io.OutputStream;
25 |
26 | /**
27 | * A memory efficient red-black tree that does not allocate any objects per
28 | * an element. This class is abstract and assumes that the child class
29 | * handles the key and comparisons with the key.
30 | */
31 | abstract class DictionaryEncoder {
32 | protected final boolean sortKeys;
33 | protected final MemoryEstimate memoryEstimate;
34 |
35 | public DictionaryEncoder(boolean sortKeys, MemoryEstimate memoryEstimate) {
36 | this.sortKeys = sortKeys;
37 | this.memoryEstimate = memoryEstimate;
38 | }
39 |
40 | /**
41 | * Compare the value at the given position to the new value.
42 | * @return 0 if the values are the same, -1 if the new value is smaller and
43 | * 1 if the new value is larger.
44 | */
45 | protected abstract int compareValue(int position);
46 |
47 | /**
48 | * Get the number of elements in the set.
49 | */
50 | public abstract int size();
51 |
52 | /**
53 | * Reset the table to empty.
54 | */
55 | public abstract void clear();
56 |
57 | /**
58 | * Returns the amount of memory used by entries in the dictionary
59 | */
60 | public abstract int getUncompressedLength();
61 |
62 | /**
63 | * The interface for visitors.
64 | */
65 | public interface Visitor {
66 | /**
67 | * Called once for each node of the tree in sort order.
68 | * @param context the information about each node
69 | * @throws IOException
70 | */
71 | void visit(VisitorContext context) throws IOException;
72 | }
73 |
74 | /**
75 | * The information about each node.
76 | */
77 | public interface VisitorContext {
78 | /**
79 | * Get the position where the key was originally added.
80 | * @return the number returned by add.
81 | */
82 | int getOriginalPosition();
83 |
84 | /**
85 | * Write the bytes for the string to the given output stream.
86 | * @param out the stream to write to.
87 | * @throws IOException
88 | */
89 | void writeBytes(OutputStream out) throws IOException;
90 |
91 | /**
92 | * Get the number of bytes
93 | * @return the string's length in bytes
94 | */
95 | int getLength();
96 |
97 | T getKey();
98 |
99 | /**
100 | * Get the number of times this key was added to the dictionary
101 | * @return the number of times this key was added to the dictionary
102 | */
103 | int getCount();
104 |
105 | /**
106 | * Get the first index stride when this key was added to the dictionary
107 | * @return the first index stride when this key was added to the dictionary
108 | */
109 | int getIndexStride();
110 | }
111 | }
112 |
113 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/DynamicArray.java:
--------------------------------------------------------------------------------
1 | package com.facebook.hive.orc;
2 |
3 | import org.apache.hadoop.hive.ql.io.slice.SizeOf;
4 | import org.apache.hadoop.hive.ql.io.slice.Slice;
5 | import org.apache.hadoop.hive.ql.io.slice.Slices;
6 |
7 | public class DynamicArray {
8 |
9 | protected Slice data; // the real data
10 | protected int length = 0; // max set element index +1
11 | private final MemoryEstimate memoryEstimate;
12 | private final int literalSize;
13 | private final int defaultSize;
14 |
15 | protected DynamicArray(int size, MemoryEstimate memoryEstimate, int literalSize,
16 | int defaultSize) {
17 | if (size <= 0) {
18 | throw new IllegalArgumentException("bad chunksize");
19 | }
20 | this.memoryEstimate = memoryEstimate;
21 | this.literalSize = literalSize;
22 | this.defaultSize = defaultSize;
23 | setData(Slices.allocate(size));
24 | }
25 |
26 | protected void setData(Slice newData) {
27 | memoryEstimate.decrementTotalMemory(data == null ? 0 : data.length());
28 | data = newData;
29 | memoryEstimate.incrementTotalMemory(data.length());
30 |
31 | }
32 |
33 | /**
34 | * Ensure that the given index is valid.
35 | */
36 | protected void grow(int index) {
37 | if ((index * literalSize) + (literalSize - 1) >= data.length()) {
38 | int newSize = Math.max((index * literalSize) + defaultSize, 2 * data.length());
39 | Slice newSlice = Slices.allocate(newSize);
40 | newSlice.setBytes(0, data);
41 | setData(newSlice);
42 | }
43 | }
44 |
45 | /**
46 | * Get the size of the array.
47 | * @return the number of bytes in the array
48 | */
49 | public int size() {
50 | return length;
51 | }
52 |
53 | /**
54 | * Clear the array to its original pristine state.
55 | */
56 | public void clear() {
57 | length = 0;
58 | setData(Slices.allocate(defaultSize));
59 | }
60 |
61 | public long getSizeInBytes() {
62 | return data.length();
63 | }
64 |
65 | // A cleanup method that should be called before allowing the object to leave scope
66 | public void cleanup() {
67 | memoryEstimate.decrementTotalMemory(data.length());
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/DynamicIntArray.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package com.facebook.hive.orc;
21 |
22 | import org.apache.hadoop.hive.ql.io.slice.SizeOf;
23 | import org.apache.hadoop.hive.ql.io.slice.Slice;
24 | import org.apache.hadoop.hive.ql.io.slice.Slices;
25 |
26 | /**
27 | * Dynamic int array that uses primitive types and chunks to avoid copying
28 | * large number of integers when it resizes.
29 | *
30 | * The motivation for this class is memory optimization, i.e. space efficient
31 | * storage of potentially huge arrays without good a-priori size guesses.
32 | *
33 | * The API of this class is between a primitive array and a AbstractList. It's
34 | * not a Collection implementation because it handles primitive types, but the
35 | * API could be extended to support iterators and the like.
36 | *
37 | * NOTE: Like standard Collection implementations/arrays, this class is not
38 | * synchronized.
39 | */
40 | final class DynamicIntArray extends DynamicArray {
41 | static final int DEFAULT_SIZE = SizeOf.SIZE_OF_INT * 8 * 1024;
42 |
43 | public DynamicIntArray(MemoryEstimate memoryEstimate) {
44 | this(DEFAULT_SIZE, memoryEstimate);
45 | }
46 |
47 | public DynamicIntArray(int size, MemoryEstimate memoryEstimate) {
48 | super(size, memoryEstimate, SizeOf.SIZE_OF_INT, DEFAULT_SIZE);
49 | }
50 |
51 | public int get(int index) {
52 | if (index >= length) {
53 | throw new IndexOutOfBoundsException("Index " + index +
54 | " is outside of 0.." +
55 | (length - 1));
56 | }
57 |
58 | return data.getInt(index * SizeOf.SIZE_OF_INT);
59 | }
60 |
61 | public void set(int index, int value) {
62 | grow(index);
63 | if (index >= length) {
64 | length = index + 1;
65 | }
66 |
67 | data.setInt(index * SizeOf.SIZE_OF_INT, value);
68 | }
69 |
70 | public void increment(int index, int value) {
71 | grow(index);
72 | if (index >= length) {
73 | length = index + 1;
74 | }
75 | data.setInt(index * SizeOf.SIZE_OF_INT, data.getInt(index * SizeOf.SIZE_OF_INT) + value);
76 | }
77 |
78 | public void add(int value) {
79 | grow(length);
80 | data.setInt(length * SizeOf.SIZE_OF_INT, value);
81 | length += 1;
82 | }
83 |
84 | @Override
85 | public String toString() {
86 | int i;
87 | StringBuilder sb = new StringBuilder(length * 4);
88 |
89 | sb.append('{');
90 | int l = length - 1;
91 | for (i=0; i= length) {
53 | throw new IndexOutOfBoundsException("Index " + index +
54 | " is outside of 0.." +
55 | (length - 1));
56 | }
57 |
58 | return data.getLong(index * SizeOf.SIZE_OF_LONG);
59 | }
60 |
61 | public void set(int index, long value) {
62 | grow(index);
63 | if (index >= length) {
64 | length = index + 1;
65 | }
66 |
67 | data.setLong(index * SizeOf.SIZE_OF_LONG, value);
68 | }
69 |
70 | public void increment(int index, long value) {
71 | grow(index);
72 | if (index >= length) {
73 | length = index + 1;
74 | }
75 |
76 | data.setLong(index * SizeOf.SIZE_OF_LONG, data.getLong(index * SizeOf.SIZE_OF_LONG) + value);
77 | }
78 |
79 | public void add(long value) {
80 | grow(length);
81 | data.setLong(length * SizeOf.SIZE_OF_LONG, value);
82 | length += 1;
83 | }
84 |
85 | @Override
86 | public String toString() {
87 | int i;
88 | StringBuilder sb = new StringBuilder(length * 4);
89 |
90 | sb.append('{');
91 | int l = length - 1;
92 | for (i=0; i {
29 | private final int column;
30 | private final OrcProto.Stream.Kind kind;
31 |
32 | public static enum Area {
33 | DATA, DICTIONARY, INDEX
34 | }
35 |
36 | public StreamName(int column, OrcProto.Stream.Kind kind) {
37 | this.column = column;
38 | this.kind = kind;
39 | }
40 |
41 | @Override
42 | public boolean equals(Object obj) {
43 | if (obj != null && obj instanceof StreamName) {
44 | StreamName other = (StreamName) obj;
45 | return other.column == column && other.kind == kind;
46 | } else {
47 | return false;
48 | }
49 | }
50 |
51 | @Override
52 | public int compareTo(StreamName streamName) {
53 | if (streamName == null) {
54 | return -1;
55 | }
56 | Area area = getArea(kind);
57 | Area otherArea = streamName.getArea(streamName.kind);
58 | if (area != otherArea) {
59 | return -area.compareTo(otherArea);
60 | }
61 | if (column != streamName.column) {
62 | return column < streamName.column ? -1 : 1;
63 | }
64 | return compareKinds(kind, streamName.kind);
65 | }
66 |
67 | // LENGTH is greater than DATA at the moment, but when we read the data we always read length
68 | // first (because you have to know how much data to read). Since this is an enum, we're kind of
69 | // stuck with it, this is just a hack to work around that.
70 | private int compareKinds(Kind kind1, Kind kind2) {
71 | if (kind1 == Kind.LENGTH && kind2 == Kind.DATA) {
72 | return -1;
73 | }
74 |
75 | if (kind1 == Kind.DATA && kind2 == Kind.LENGTH) {
76 | return 1;
77 | }
78 |
79 | return kind1.compareTo(kind2);
80 | }
81 |
82 | public int getColumn() {
83 | return column;
84 | }
85 |
86 | public OrcProto.Stream.Kind getKind() {
87 | return kind;
88 | }
89 |
90 | public Area getArea() {
91 | return getArea(kind);
92 | }
93 |
94 | public static Area getArea(OrcProto.Stream.Kind kind) {
95 | switch (kind) {
96 | case ROW_INDEX:
97 | case DICTIONARY_COUNT:
98 | return Area.INDEX;
99 | case DICTIONARY_DATA:
100 | return Area.DICTIONARY;
101 | default:
102 | return Area.DATA;
103 | }
104 | }
105 |
106 | @Override
107 | public String toString() {
108 | return "Stream for column " + column + " kind " + kind;
109 | }
110 |
111 | @Override
112 | public int hashCode() {
113 | return column * 101 + kind.getNumber();
114 | }
115 | }
116 |
117 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/StripeInformation.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package com.facebook.hive.orc;
21 |
22 | /**
23 | * Information about the stripes in an ORC file that is provided by the Reader.
24 | */
25 | public interface StripeInformation {
26 | /**
27 | * Get the byte offset of the start of the stripe.
28 | * @return the bytes from the start of the file
29 | */
30 | long getOffset();
31 |
32 | /**
33 | * Get the length of the stripe's indexes.
34 | * @return the number of bytes in the index
35 | */
36 | long getIndexLength();
37 |
38 | /**
39 | * Get the length of the stripe's data.
40 | * @return the number of bytes in the stripe
41 | */
42 | long getDataLength();
43 |
44 | /**
45 | * Get the length of the stripe's tail section, which contains its index.
46 | * @return the number of bytes in the tail
47 | */
48 | long getFooterLength();
49 |
50 | /**
51 | * Get the number of rows in the stripe.
52 | * @return a count of the number of rows
53 | */
54 | long getNumberOfRows();
55 |
56 | /**
57 | * Get the raw size of the data in the stripe.
58 | * @return the number of bytes of raw data
59 | */
60 | long getRawDataSize();
61 | }
62 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/StripeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package com.facebook.hive.orc;
21 |
22 | import java.io.IOException;
23 | import java.util.ArrayList;
24 | import java.util.List;
25 |
26 | import org.apache.hadoop.fs.FSDataInputStream;
27 | import org.apache.hadoop.fs.FileSystem;
28 | import org.apache.hadoop.fs.Path;
29 | import com.facebook.hive.orc.OrcFile.KeyWrapper;
30 | import com.facebook.hive.orc.OrcFile.ValueWrapper;
31 |
32 | public class StripeReader {
33 | private final FSDataInputStream file;
34 | private final List stripes =
35 | new ArrayList();
36 | private StripeInformation currentStripe;
37 | private byte[] currentData;
38 | private int stripesRead = 0;
39 |
40 | StripeReader(Iterable stripes,
41 | FileSystem fileSystem,
42 | Path path,
43 | long offset, long length
44 | ) throws IOException {
45 | this.file = fileSystem.open(path);
46 | for(StripeInformation stripe: stripes) {
47 | long stripeStart = stripe.getOffset();
48 | if (stripeStart >= offset && stripeStart < offset + length) {
49 | this.stripes.add(stripe);
50 | }
51 | }
52 | }
53 |
54 | private void readStripe() throws IOException {
55 | currentStripe = stripes.get(stripesRead);
56 | currentData = new byte[(int) (currentStripe.getIndexLength() + currentStripe.getDataLength() +
57 | currentStripe.getFooterLength())];
58 | file.seek(currentStripe.getOffset());
59 | file.readFully(currentData, 0, currentData.length);
60 | }
61 |
62 | public boolean hasNext() throws IOException {
63 | return stripesRead < stripes.size();
64 | }
65 |
66 | public boolean nextStripe(KeyWrapper keyWrapper, ValueWrapper valueWrapper) throws IOException {
67 | if (hasNext()) {
68 | readStripe();
69 | keyWrapper.key = currentStripe;
70 | valueWrapper.value = currentData;
71 | stripesRead += 1;
72 | }
73 | return hasNext();
74 | }
75 |
76 | public void close() throws IOException {
77 | file.close();
78 | }
79 |
80 | public long getPosition() throws IOException {
81 | return file.getPos();
82 | }
83 |
84 | /**
85 | * Return the fraction of stripes that have been read from the selected.
86 | * section of the file
87 | * @return fraction between 0.0 and 1.0 of stripes consumed
88 | */
89 | public float getProgress() {
90 | return ((float) stripesRead) / stripes.size();
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/Writer.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc;
22 |
23 | import java.io.IOException;
24 | import java.nio.ByteBuffer;
25 |
26 | /**
27 | * The interface for writing ORC files.
28 | */
29 | public interface Writer {
30 | /**
31 | * Add arbitrary meta-data to the ORC file. This may be called at any point
32 | * until the Writer is closed. If the same key is passed a second time, the
33 | * second value will replace the first.
34 | * @param key a key to label the data with.
35 | * @param value the contents of the metadata.
36 | */
37 | void addUserMetadata(String key, ByteBuffer value);
38 |
39 | /**
40 | * Add a row to the ORC file.
41 | * @param row the row to add
42 | * @throws IOException
43 | */
44 | void addRow(Object row) throws IOException;
45 |
46 | /**
47 | * Get the raw data size of the last row added
48 | */
49 | long getRowRawDataSize();
50 |
51 | /**
52 | * Flush all of the buffers and close the file. No methods on this writer
53 | * should be called afterwards.
54 | * @throws IOException
55 | */
56 | void close() throws IOException;
57 | }
58 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/compression/CompressionCodec.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package com.facebook.hive.orc.compression;
21 |
22 | import java.io.IOException;
23 | import java.nio.ByteBuffer;
24 |
25 | import org.apache.hadoop.conf.Configuration;
26 |
27 | public interface CompressionCodec {
28 | /**
29 | * Compress the in buffer to the out buffer.
30 | * @param in the bytes to compress
31 | * @param out the uncompressed bytes
32 | * @param overflow put any additional bytes here
33 | * @return true if the output is smaller than input
34 | * @throws IOException
35 | */
36 | boolean compress(ByteBuffer in, ByteBuffer out, ByteBuffer overflow
37 | ) throws IOException;
38 |
39 | /**
40 | * Decompress the in buffer to the out buffer.
41 | * @param in the bytes to decompress
42 | * @param out the decompressed bytes
43 | * @throws IOException
44 | */
45 | void decompress(ByteBuffer in, ByteBuffer out) throws IOException;
46 |
47 | void reloadConfigurations(Configuration conf);
48 | }
49 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/compression/CompressionKind.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.compression;
22 |
23 | /**
24 | * An enumeration that lists the generic compression algorithms that
25 | * can be applied to ORC files.
26 | */
27 | public enum CompressionKind {
28 | NONE, ZLIB, SNAPPY, LZO
29 | }
30 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/compression/SnappyCodec.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.compression;
22 |
23 | import java.io.IOException;
24 | import java.nio.ByteBuffer;
25 |
26 | import com.facebook.hive.orc.compression.CompressionCodec;
27 | import org.apache.hadoop.conf.Configuration;
28 | import org.iq80.snappy.Snappy;
29 |
30 | public class SnappyCodec implements CompressionCodec {
31 |
32 | @Override
33 | public void reloadConfigurations(Configuration conf) {
34 | }
35 |
36 | @Override
37 | public boolean compress(ByteBuffer in, ByteBuffer out,
38 | ByteBuffer overflow) throws IOException {
39 | int inBytes = in.remaining();
40 | // I should work on a patch for Snappy to support an overflow buffer
41 | // to prevent the extra buffer copy.
42 | byte[] compressed = new byte[Snappy.maxCompressedLength(inBytes)];
43 | int outBytes =
44 | Snappy.compress(in.array(), in.arrayOffset() + in.position(), inBytes,
45 | compressed, 0);
46 | if (outBytes < inBytes) {
47 | int remaining = out.remaining();
48 | if (remaining >= outBytes) {
49 | System.arraycopy(compressed, 0, out.array(), out.arrayOffset() +
50 | out.position(), outBytes);
51 | out.position(out.position() + outBytes);
52 | } else {
53 | System.arraycopy(compressed, 0, out.array(), out.arrayOffset() +
54 | out.position(), remaining);
55 | out.position(out.limit());
56 | System.arraycopy(compressed, remaining, overflow.array(),
57 | overflow.arrayOffset(), outBytes - remaining);
58 | overflow.position(outBytes - remaining);
59 | }
60 | return true;
61 | } else {
62 | return false;
63 | }
64 | }
65 |
66 | @Override
67 | public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
68 | int inOffset = in.position();
69 | int uncompressLen =
70 | Snappy.uncompress(in.array(), in.arrayOffset() + inOffset,
71 | in.limit() - inOffset, out.array(), out.arrayOffset() + out.position());
72 | out.position(uncompressLen + out.position());
73 | out.flip();
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/compression/ZlibCodec.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package com.facebook.hive.orc.compression;
21 |
22 | import java.io.IOException;
23 | import java.nio.ByteBuffer;
24 | import java.util.zip.DataFormatException;
25 | import java.util.zip.Deflater;
26 | import java.util.zip.Inflater;
27 |
28 | import com.facebook.hive.orc.OrcConf;
29 | import com.facebook.hive.orc.compression.CompressionCodec;
30 | import org.apache.hadoop.conf.Configuration;
31 |
32 | public class ZlibCodec implements CompressionCodec {
33 |
34 | private int compressionLevel;
35 |
36 | public ZlibCodec() {
37 | compressionLevel = Deflater.DEFAULT_COMPRESSION;
38 | }
39 |
40 | public ZlibCodec(Configuration conf) {
41 | if (conf == null) {
42 | compressionLevel = Deflater.DEFAULT_COMPRESSION;
43 | } else {
44 | compressionLevel = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ZLIB_COMPRESSION_LEVEL);
45 | }
46 | }
47 |
48 | public void reloadConfigurations(Configuration conf) {
49 | compressionLevel = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ZLIB_COMPRESSION_LEVEL);
50 | }
51 |
52 | @Override
53 | public boolean compress(ByteBuffer in, ByteBuffer out,
54 | ByteBuffer overflow) throws IOException {
55 | Deflater deflater = new Deflater(compressionLevel, true);
56 | int length = in.remaining();
57 | deflater.setInput(in.array(), in.arrayOffset() + in.position(), length);
58 | deflater.finish();
59 | int outSize = 0;
60 | int offset = out.arrayOffset() + out.position();
61 | while (!deflater.finished() && (length > outSize)) {
62 | int size = deflater.deflate(out.array(), offset, out.remaining());
63 | out.position(size + out.position());
64 | outSize += size;
65 | offset += size;
66 | // if we run out of space in the out buffer, use the overflow
67 | if (out.remaining() == 0) {
68 | if (overflow == null) {
69 | deflater.end();
70 | return false;
71 | }
72 | out = overflow;
73 | offset = out.arrayOffset() + out.position();
74 | }
75 | }
76 | deflater.end();
77 | return length > outSize;
78 | }
79 |
80 | @Override
81 | public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
82 | Inflater inflater = new Inflater(true);
83 | inflater.setInput(in.array(), in.arrayOffset() + in.position(),
84 | in.remaining());
85 | while (!(inflater.finished() || inflater.needsDictionary() ||
86 | inflater.needsInput())) {
87 | try {
88 | int count = inflater.inflate(out.array(),
89 | out.arrayOffset() + out.position(),
90 | out.remaining());
91 | out.position(count + out.position());
92 | } catch (DataFormatException dfe) {
93 | throw new IOException("Bad compression data", dfe);
94 | }
95 | }
96 | out.flip();
97 | inflater.end();
98 | in.position(in.limit());
99 | }
100 |
101 | }
102 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyBinaryTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.EOFException;
24 | import java.io.IOException;
25 | import java.util.List;
26 | import java.util.Map;
27 |
28 | import org.apache.hadoop.io.BytesWritable;
29 |
30 | import com.facebook.hive.orc.InStream;
31 | import com.facebook.hive.orc.OrcProto;
32 | import com.facebook.hive.orc.RunLengthIntegerReader;
33 | import com.facebook.hive.orc.StreamName;
34 | import com.facebook.hive.orc.WriterImpl;
35 | import com.facebook.hive.orc.OrcProto.RowIndex;
36 | import com.facebook.hive.orc.OrcProto.RowIndexEntry;
37 |
38 | public class LazyBinaryTreeReader extends LazyTreeReader {
39 |
40 | private InStream stream;
41 | private RunLengthIntegerReader lengths;
42 |
43 | public LazyBinaryTreeReader(int columnId, long rowIndexStride) {
44 | super(columnId, rowIndexStride);
45 | }
46 |
47 | @Override
48 | public void startStripe(Map streams, List encodings,
49 | RowIndex[] indexes, long rowBaseInStripe) throws IOException {
50 | super.startStripe(streams, encodings, indexes, rowBaseInStripe);
51 | StreamName name = new StreamName(columnId,
52 | OrcProto.Stream.Kind.DATA);
53 | stream = streams.get(name);
54 | lengths = new RunLengthIntegerReader(streams.get(new
55 | StreamName(columnId, OrcProto.Stream.Kind.LENGTH)),
56 | false, WriterImpl.INT_BYTE_SIZE);
57 | if (indexes[columnId] != null) {
58 | loadIndeces(indexes[columnId].getEntryList(), 0);
59 | }
60 | }
61 |
62 | @Override
63 | public void seek(int index) throws IOException {
64 | stream.seek(index);
65 | lengths.seek(index);
66 | }
67 |
68 | @Override
69 | public int loadIndeces(List rowIndexEntries, int startIndex) {
70 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex);
71 | updatedStartIndex = stream.loadIndeces(rowIndexEntries, updatedStartIndex);
72 | return lengths.loadIndeces(rowIndexEntries, updatedStartIndex);
73 | }
74 |
75 | @Override
76 | public Object next(Object previous) throws IOException {
77 | BytesWritable result = null;
78 | if (valuePresent) {
79 | if (previous == null) {
80 | result = new BytesWritable();
81 | } else {
82 | result = (BytesWritable) previous;
83 | }
84 | int len = (int) lengths.next();
85 | result.setSize(len);
86 | int offset = 0;
87 | while (len > 0) {
88 | int written = stream.read(result.getBytes(), offset, len);
89 | if (written < 0) {
90 | throw new EOFException("Can't finish byte read from " + stream);
91 | }
92 | len -= written;
93 | offset += written;
94 | }
95 | }
96 | return result;
97 | }
98 |
99 | @Override
100 | public void skipRows(long numNonNullValues) throws IOException {
101 | long lengthToSkip = 0;
102 | for(int i=0; i < numNonNullValues; ++i) {
103 | lengthToSkip += lengths.next();
104 | }
105 | stream.skip(lengthToSkip);
106 | }
107 |
108 | @Override
109 | public void close() throws IOException {
110 | super.close();
111 | if (stream != null) {
112 | stream.close();
113 | }
114 | if (lengths != null) {
115 | lengths.close();
116 | }
117 | }
118 |
119 | }
120 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyBooleanTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 | import java.util.List;
25 | import java.util.Map;
26 |
27 | import org.apache.hadoop.io.BooleanWritable;
28 |
29 | import com.facebook.hive.orc.BitFieldReader;
30 | import com.facebook.hive.orc.InStream;
31 | import com.facebook.hive.orc.OrcProto;
32 | import com.facebook.hive.orc.StreamName;
33 | import com.facebook.hive.orc.OrcProto.RowIndex;
34 | import com.facebook.hive.orc.OrcProto.RowIndexEntry;
35 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException;
36 |
37 | public class LazyBooleanTreeReader extends LazyTreeReader {
38 |
39 | private BitFieldReader reader = null;
40 | private boolean latestRead = true; //< Latest value from stream.
41 |
42 | public LazyBooleanTreeReader(int columnId, long rowIndexStride) {
43 | super(columnId, rowIndexStride);
44 | }
45 |
46 | @Override
47 | public void startStripe(Map streams, List encodings,
48 | RowIndex[] indexes, long rowBaseInStripe) throws IOException {
49 | super.startStripe(streams, encodings, indexes, rowBaseInStripe);
50 | reader = new BitFieldReader(streams.get(new StreamName(columnId,
51 | OrcProto.Stream.Kind.DATA)));
52 | if (indexes[columnId] != null) {
53 | loadIndeces(indexes[columnId].getEntryList(), 0);
54 | }
55 | }
56 |
57 | @Override
58 | public void seek(int index) throws IOException {
59 | reader.seek(index);
60 | }
61 |
62 | @Override
63 | public int loadIndeces(List rowIndexEntries, int startIndex) {
64 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex);
65 | return reader.loadIndeces(rowIndexEntries, updatedStartIndex);
66 | }
67 |
68 | @Override
69 | public void skipRows(long numNonNullValues) throws IOException {
70 | reader.skip(numNonNullValues);
71 | }
72 |
73 | boolean readBoolean() throws IOException {
74 | latestRead = (reader.next() == 1);
75 | return latestRead;
76 | }
77 |
78 |
79 | BooleanWritable createWritable(Object previous, boolean v) throws IOException {
80 | BooleanWritable result = null;
81 | if (previous == null) {
82 | result = new BooleanWritable();
83 | } else {
84 | result = (BooleanWritable) previous;
85 | }
86 | result.set(v);
87 | return result;
88 | }
89 |
90 | @Override
91 | public Object createWritableFromLatest(Object previous) throws IOException {
92 | return createWritable(previous, latestRead);
93 | }
94 |
95 | @Override
96 | public boolean nextBoolean(boolean readStream) throws IOException {
97 | if (!readStream) {
98 | return latestRead;
99 | }
100 | if (!valuePresent) {
101 | throw new ValueNotPresentException("Cannot materialize boolean.");
102 | }
103 | return readBoolean();
104 |
105 | }
106 |
107 | @Override
108 | public Object next(Object previous) throws IOException {
109 | BooleanWritable result = null;
110 | if (valuePresent) {
111 | result = createWritable(previous, readBoolean());
112 | }
113 | return result;
114 | }
115 |
116 | @Override
117 | public void close() throws IOException {
118 | super.close();
119 | if (reader != null) {
120 | reader.close();
121 | }
122 | }
123 | }
124 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyByteTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 | import java.util.List;
25 | import java.util.Map;
26 |
27 | import org.apache.hadoop.hive.serde2.io.ByteWritable;
28 |
29 | import com.facebook.hive.orc.InStream;
30 | import com.facebook.hive.orc.OrcProto;
31 | import com.facebook.hive.orc.RunLengthByteReader;
32 | import com.facebook.hive.orc.StreamName;
33 | import com.facebook.hive.orc.OrcProto.RowIndex;
34 | import com.facebook.hive.orc.OrcProto.RowIndexEntry;
35 |
36 | public class LazyByteTreeReader extends LazyTreeReader {
37 |
38 | private RunLengthByteReader reader = null;
39 |
40 | public LazyByteTreeReader(int columnId, long rowIndexStride) {
41 | super(columnId, rowIndexStride);
42 | }
43 |
44 | @Override
45 | public void startStripe(Map streams, List encodings,
46 | RowIndex[] indexes, long rowBaseInStripe) throws IOException {
47 | super.startStripe(streams, encodings, indexes, rowBaseInStripe);
48 | reader = new RunLengthByteReader(streams.get(new StreamName(columnId,
49 | OrcProto.Stream.Kind.DATA)));
50 | if (indexes[columnId] != null) {
51 | loadIndeces(indexes[columnId].getEntryList(), 0);
52 | }
53 | }
54 |
55 | @Override
56 | public void seek(int index) throws IOException {
57 | reader.seek(index);
58 | }
59 |
60 | @Override
61 | public int loadIndeces(List rowIndexEntries, int startIndex) {
62 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex);
63 | return reader.loadIndeces(rowIndexEntries, updatedStartIndex);
64 | }
65 |
66 | @Override
67 | public Object next(Object previous) throws IOException {
68 | ByteWritable result = null;
69 | if (valuePresent) {
70 | if (previous == null) {
71 | result = new ByteWritable();
72 | } else {
73 | result = (ByteWritable) previous;
74 | }
75 | result.set(reader.next());
76 | }
77 | return result;
78 | }
79 |
80 | @Override
81 | public void skipRows(long numNonNullValues) throws IOException {
82 | reader.skip(numNonNullValues);
83 | }
84 |
85 | @Override
86 | public void close() throws IOException {
87 | super.close();
88 | if (reader != null) {
89 | reader.close();
90 | }
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyFloatTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 | import java.util.List;
25 | import java.util.Map;
26 |
27 | import org.apache.hadoop.io.FloatWritable;
28 |
29 | import com.facebook.hive.orc.InStream;
30 | import com.facebook.hive.orc.OrcProto;
31 | import com.facebook.hive.orc.SerializationUtils;
32 | import com.facebook.hive.orc.StreamName;
33 | import com.facebook.hive.orc.OrcProto.RowIndex;
34 | import com.facebook.hive.orc.OrcProto.RowIndexEntry;
35 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException;
36 |
37 | public class LazyFloatTreeReader extends LazyTreeReader {
38 |
39 | private InStream stream;
40 | private float latestRead = 0; //< Last float that was read from stream.
41 |
42 | public LazyFloatTreeReader(int columnId, long rowIndexStride) {
43 | super(columnId, rowIndexStride);
44 | }
45 |
46 | @Override
47 | public void startStripe(Map streams, List encodings,
48 | RowIndex[] indexes, long rowBaseInStripe) throws IOException {
49 | super.startStripe(streams, encodings, indexes, rowBaseInStripe);
50 | StreamName name = new StreamName(columnId,
51 | OrcProto.Stream.Kind.DATA);
52 | stream = streams.get(name);
53 | if (indexes[columnId] != null) {
54 | loadIndeces(indexes[columnId].getEntryList(), 0);
55 | }
56 | }
57 |
58 | @Override
59 | public void seek(int index) throws IOException {
60 | stream.seek(index);
61 | }
62 |
63 | @Override
64 | public int loadIndeces(List rowIndexEntries, int startIndex) {
65 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex);
66 | return stream.loadIndeces(rowIndexEntries, updatedStartIndex);
67 | }
68 |
69 | /**
70 | * Read a float value from the stream.
71 | */
72 | private float readFloat() throws IOException {
73 | latestRead = SerializationUtils.readFloat(stream);
74 | return latestRead;
75 | }
76 |
77 |
78 | FloatWritable createWritable(Object previous, float value) throws IOException {
79 | FloatWritable result = null;
80 | if (previous == null) {
81 | result = new FloatWritable();
82 | } else {
83 | result = (FloatWritable) previous;
84 | }
85 | result.set(value);
86 | return result;
87 | }
88 |
89 | @Override
90 | public Object createWritableFromLatest(Object previous) throws IOException {
91 | return createWritable(previous, latestRead);
92 | }
93 |
94 | /**
95 | * Give the next float as a primitive.
96 | */
97 | @Override
98 | public float nextFloat(boolean readStream) throws IOException, ValueNotPresentException {
99 | if (!readStream) {
100 | return latestRead;
101 | }
102 | if (!valuePresent) {
103 | throw new ValueNotPresentException("Cannot materialize float..");
104 | }
105 | return readFloat();
106 | }
107 |
108 | @Override
109 | public Object next(Object previous) throws IOException {
110 | FloatWritable result = null;
111 | if (valuePresent) {
112 | result = createWritable(previous, readFloat());
113 | }
114 | return result;
115 | }
116 |
117 | @Override
118 | public void skipRows(long numNonNullValues) throws IOException {
119 | for(int i=0; i < numNonNullValues; ++i) {
120 | SerializationUtils.readFloat(stream);
121 | }
122 | }
123 |
124 | @Override
125 | public void close() throws IOException {
126 | super.close();
127 | if (stream != null) {
128 | stream.close();
129 | }
130 | }
131 | }
132 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyIntDictionaryTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 |
25 | import org.apache.hadoop.io.IntWritable;
26 |
27 | import com.facebook.hive.orc.WriterImpl;
28 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException;
29 |
30 | class LazyIntDictionaryTreeReader extends LazyNumericDictionaryTreeReader {
31 | LazyIntDictionaryTreeReader (int columnId, long rowIndexStride) {
32 | super(columnId, rowIndexStride);
33 | }
34 |
35 | @Override
36 | protected int getNumBytes() {
37 | return WriterImpl.INT_BYTE_SIZE;
38 | }
39 |
40 | private int latestValue; //< Latest key that was read from reader.
41 |
42 | /**
43 | * Read an int value from the stream.
44 | */
45 | private int readInt() throws IOException {
46 | return latestValue = (int) readPrimitive();
47 | }
48 |
49 | private int latestValue() {
50 | return latestValue;
51 | }
52 |
53 | IntWritable createWritable(Object previous, int v) throws IOException {
54 | IntWritable result = null;
55 | if (previous == null) {
56 | result = new IntWritable();
57 | } else {
58 | result = (IntWritable) previous;
59 | }
60 | result.set(v);
61 | return result;
62 | }
63 |
64 | @Override
65 | public Object createWritableFromLatest(Object previous) throws IOException {
66 | return createWritable(previous, latestValue());
67 | }
68 |
69 | /**
70 | * Give the next int as a primitive
71 | */
72 | @Override
73 | public int nextInt(boolean readStream) throws IOException {
74 | if (!readStream) {
75 | return latestValue();
76 | }
77 | if (!valuePresent) {
78 | throw new ValueNotPresentException("Cannot materialize int.");
79 | }
80 | return readInt();
81 | }
82 |
83 |
84 | @Override
85 | public Object next(Object previous) throws IOException {
86 | IntWritable result = null;
87 | if (valuePresent) {
88 | result = createWritable(previous, readInt());
89 | }
90 | return result;
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyIntDirectTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 |
25 | import org.apache.hadoop.io.IntWritable;
26 |
27 | import com.facebook.hive.orc.SerializationUtils;
28 | import com.facebook.hive.orc.WriterImpl;
29 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException;
30 |
31 |
32 | class LazyIntDirectTreeReader extends LazyNumericDirectTreeReader {
33 | LazyIntDirectTreeReader(int columnId, long rowIndexStride) {
34 | super(columnId, rowIndexStride);
35 | }
36 |
37 | private int latestRead; //< Latest integer read from the stream.
38 |
39 | /**
40 | * Read an int value from the stream.
41 | */
42 | private int readInt() throws IOException {
43 | latestRead = (int)SerializationUtils.readIntegerType(input, WriterImpl.INT_BYTE_SIZE,
44 | true, input.useVInts());
45 | return latestRead;
46 | }
47 |
48 |
49 | IntWritable createWritable(Object previous, int v) throws IOException {
50 | IntWritable result = null;
51 | if (previous == null) {
52 | result = new IntWritable();
53 | } else {
54 | result = (IntWritable) previous;
55 | }
56 | result.set(v);
57 | return result;
58 | }
59 |
60 | @Override
61 | public Object createWritableFromLatest(Object previous) throws IOException {
62 | return createWritable(previous, latestRead);
63 | }
64 |
65 | /**
66 | * Give the next int as a primitive
67 | */
68 | @Override
69 | public int nextInt(boolean readStream) throws IOException {
70 | if (!readStream)
71 | return latestRead;
72 | if (!valuePresent)
73 | throw new ValueNotPresentException("Cannot materialize int.");
74 | return readInt();
75 | }
76 |
77 |
78 | @Override
79 | public Object next(Object previous) throws IOException {
80 | IntWritable result = null;
81 | if (valuePresent) {
82 | result = createWritable(previous, readInt());
83 | }
84 | return result;
85 | }
86 |
87 | @Override
88 | public void skipRows(long numNonNullValues) throws IOException {
89 | for (int i = 0; i < numNonNullValues; i++) {
90 | SerializationUtils.readIntegerType(input, WriterImpl.INT_BYTE_SIZE,
91 | true, input.useVInts());
92 | }
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyListTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 | import java.util.ArrayList;
25 | import java.util.List;
26 | import java.util.Map;
27 |
28 | import com.facebook.hive.orc.InStream;
29 | import com.facebook.hive.orc.OrcProto;
30 | import com.facebook.hive.orc.RunLengthIntegerReader;
31 | import com.facebook.hive.orc.StreamName;
32 | import com.facebook.hive.orc.WriterImpl;
33 | import com.facebook.hive.orc.OrcProto.RowIndex;
34 | import com.facebook.hive.orc.OrcProto.RowIndexEntry;
35 |
36 | public class LazyListTreeReader extends LazyTreeReader {
37 | private final LazyTreeReader elementReader;
38 | private RunLengthIntegerReader lengths;
39 |
40 | public LazyListTreeReader(int columnId, long rowIndexStride, LazyTreeReader elementReader) {
41 | super(columnId, rowIndexStride);
42 | this.elementReader = elementReader;
43 | }
44 |
45 | @Override
46 | public Object next(Object previous) throws IOException {
47 | List result = null;
48 | if (valuePresent) {
49 | final int length = nextLength();
50 | final List prevResult = previous == null ?
51 | null : (ArrayList) previous;
52 | final int prevLength = prevResult == null ? -1 : prevResult.size();
53 |
54 | // read the new elements into the array
55 | result = new ArrayList(length);
56 | for(int i=0; i < length; i++) {
57 | result.add(elementReader.getInComplexType(i < prevLength ?
58 | prevResult.get(i) : null, previousRow));
59 | }
60 | }
61 | return result;
62 | }
63 |
64 | @Override
65 | protected void seek(int rowIndexEntry, boolean backwards) throws IOException {
66 | super.seek(rowIndexEntry, backwards);
67 | elementReader.seek(rowIndexEntry, backwards);
68 | }
69 |
70 | @Override
71 | public void startStripe(Map streams,
72 | List encodings,
73 | RowIndex[] indexes,
74 | long rowBaseInStripe
75 | ) throws IOException {
76 | super.startStripe(streams, encodings, indexes, rowBaseInStripe);
77 | elementReader.startStripe(streams, encodings, indexes, rowBaseInStripe);
78 | lengths = new RunLengthIntegerReader(streams.get(new StreamName(columnId,
79 | OrcProto.Stream.Kind.LENGTH)), false, WriterImpl.INT_BYTE_SIZE);
80 | if (indexes[columnId] != null) {
81 | loadIndeces(indexes[columnId].getEntryList(), 0);
82 | }
83 | }
84 |
85 | @Override
86 | public void seek(int index) throws IOException {
87 | lengths.seek(index);
88 | }
89 |
90 | @Override
91 | public int loadIndeces(List rowIndexEntries, int startIndex) {
92 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex);
93 | return lengths.loadIndeces(rowIndexEntries, updatedStartIndex);
94 | }
95 |
96 | public int nextLength() throws IOException {
97 | return (int) lengths.next();
98 | }
99 |
100 | @Override
101 | public void skipRows(long numNonNullValues) throws IOException {
102 | long childSkip = 0;
103 | for(long i=0; i < numNonNullValues; ++i) {
104 | childSkip += lengths.next();
105 | }
106 | elementReader.skipRowsInComplexType(childSkip);
107 | }
108 |
109 | @Override
110 | public void close() throws IOException {
111 | super.close();
112 | if (elementReader != null) {
113 | elementReader.close();
114 | }
115 | if (lengths != null) {
116 | lengths.close();
117 | }
118 | }
119 | }
120 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyLongDictionaryTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 |
25 | import org.apache.hadoop.io.LongWritable;
26 |
27 | import com.facebook.hive.orc.WriterImpl;
28 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException;
29 |
30 | class LazyLongDictionaryTreeReader extends LazyNumericDictionaryTreeReader {
31 |
32 | private long latestValue = 0; //< Latest index read from reader
33 |
34 | LazyLongDictionaryTreeReader (int columnId, long rowIndexStride) {
35 | super(columnId, rowIndexStride);
36 | }
37 |
38 | @Override
39 | protected int getNumBytes() {
40 | return WriterImpl.LONG_BYTE_SIZE;
41 | }
42 |
43 | private long readLong() throws IOException {
44 | return latestValue = readPrimitive();
45 | }
46 |
47 | private long latestValue() {
48 | return latestValue;
49 | }
50 |
51 | private LongWritable createWritable(Object previous, long v) throws IOException {
52 | LongWritable result = null;
53 | if (previous == null) {
54 | result = new LongWritable();
55 | } else {
56 | result = (LongWritable) previous;
57 | }
58 | result.set(v);
59 | return result;
60 | }
61 |
62 | @Override
63 | public Object createWritableFromLatest(Object previous) throws IOException {
64 | return createWritable(previous, latestValue());
65 | }
66 |
67 | @Override
68 | public long nextLong(boolean readStream) throws IOException {
69 | if (!readStream) {
70 | return latestValue();
71 | }
72 | if (!valuePresent) {
73 | throw new ValueNotPresentException("Cannot materialize long.");
74 | }
75 | return readLong();
76 | }
77 |
78 | @Override
79 | public Object next(Object previous) throws IOException {
80 | LongWritable result = null;
81 | if (valuePresent) {
82 | result = createWritable(previous, readLong());
83 | }
84 | return result;
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyLongDirectTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 |
25 | import org.apache.hadoop.io.LongWritable;
26 |
27 | import com.facebook.hive.orc.SerializationUtils;
28 | import com.facebook.hive.orc.WriterImpl;
29 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException;
30 |
31 |
32 | class LazyLongDirectTreeReader extends LazyNumericDirectTreeReader {
33 | private long latestRead = 0;
34 |
35 | LazyLongDirectTreeReader(int columnId, long rowIndexStride) {
36 | super(columnId, rowIndexStride);
37 | }
38 |
39 | private long readLong() throws IOException {
40 | latestRead = (long)SerializationUtils.readIntegerType(input, WriterImpl.LONG_BYTE_SIZE,
41 | true, input.useVInts());
42 | return latestRead;
43 | }
44 |
45 | private LongWritable createWritable(Object previous, long v) throws IOException {
46 | LongWritable result = null;
47 | if (previous == null) {
48 | result = new LongWritable();
49 | } else {
50 | result = (LongWritable) previous;
51 | }
52 | result.set(v);
53 | return result;
54 | }
55 |
56 | @Override
57 | public Object createWritableFromLatest(Object previous) throws IOException {
58 | return createWritable(previous, latestRead);
59 | }
60 |
61 | @Override
62 | public long nextLong(boolean readStream) throws IOException {
63 | if (!readStream)
64 | return latestRead;
65 | if (!valuePresent)
66 | throw new ValueNotPresentException("Cannot materialize long.");
67 | return readLong();
68 | }
69 |
70 | @Override
71 | public Object next(Object previous) throws IOException {
72 | LongWritable result = null;
73 | if (valuePresent) {
74 | result = createWritable(previous, readLong());
75 | }
76 | return result;
77 | }
78 |
79 | @Override
80 | public void skipRows(long numNonNullValues) throws IOException {
81 | for (int i = 0; i < numNonNullValues; i++) {
82 | SerializationUtils.readIntegerType(input, WriterImpl.LONG_BYTE_SIZE,
83 | true, input.useVInts());
84 | }
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyLongTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 | import java.util.List;
25 | import java.util.Map;
26 |
27 | import com.facebook.hive.orc.InStream;
28 | import com.facebook.hive.orc.OrcProto;
29 | import com.facebook.hive.orc.StreamName;
30 | import com.facebook.hive.orc.OrcProto.RowIndex;
31 |
32 | public class LazyLongTreeReader extends LazyIntTreeReader {
33 |
34 | public LazyLongTreeReader(int columnId, long rowIndexStride) {
35 | super(columnId, rowIndexStride);
36 | }
37 |
38 | @Override
39 | public void startStripe(Map streams, List encodings,
40 | RowIndex[] indexes, long rowBaseInStripe) throws IOException {
41 | switch (encodings.get(columnId).getKind()) {
42 | case DICTIONARY:
43 | reader = new LazyLongDictionaryTreeReader(columnId, rowIndexStride);
44 | break;
45 | case DIRECT:
46 | reader = new LazyLongDirectTreeReader(columnId, rowIndexStride);
47 | break;
48 | default:
49 | throw new IllegalArgumentException("Unsupported encoding " +
50 | encodings.get(columnId).getKind());
51 | }
52 | reader.startStripe(streams, encodings, indexes, rowBaseInStripe);
53 | if (indexes[columnId] != null) {
54 | loadIndeces(indexes[columnId].getEntryList(), 0);
55 | }
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyNumericDirectTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 | import java.util.List;
25 | import java.util.Map;
26 |
27 | import com.facebook.hive.orc.InStream;
28 | import com.facebook.hive.orc.OrcProto;
29 | import com.facebook.hive.orc.StreamName;
30 | import com.facebook.hive.orc.OrcProto.RowIndex;
31 | import com.facebook.hive.orc.OrcProto.RowIndexEntry;
32 |
33 | abstract class LazyNumericDirectTreeReader extends LazyTreeReader {
34 | protected InStream input;
35 |
36 | LazyNumericDirectTreeReader(int columnId, long rowIndexStride) {
37 | super(columnId, rowIndexStride);
38 | }
39 |
40 | @Override
41 | public void startStripe(Map streams, List encodings,
42 | RowIndex[] indexes, long rowBaseInStripe) throws IOException {
43 | super.startStripe(streams, encodings, indexes, rowBaseInStripe);
44 | StreamName name = new StreamName(columnId,
45 | OrcProto.Stream.Kind.DATA);
46 | input = streams.get(name);
47 | if (indexes[columnId] != null) {
48 | loadIndeces(indexes[columnId].getEntryList(), 0);
49 | }
50 | }
51 |
52 | @Override
53 | public void seek(int index) throws IOException {
54 | input.seek(index);
55 | }
56 |
57 | @Override
58 | public int loadIndeces(List rowIndexEntries, int startIndex) {
59 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex);
60 | return input.loadIndeces(rowIndexEntries, updatedStartIndex);
61 | }
62 |
63 | @Override
64 | public void close() throws IOException {
65 | super.close();
66 | if (input != null) {
67 | input.close();
68 | }
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyShortDictionaryTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 |
25 | import org.apache.hadoop.hive.serde2.io.ShortWritable;
26 |
27 | import com.facebook.hive.orc.WriterImpl;
28 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException;
29 |
30 | class LazyShortDictionaryTreeReader extends LazyNumericDictionaryTreeReader {
31 | LazyShortDictionaryTreeReader (int columnId, long rowIndexStride) {
32 | super(columnId, rowIndexStride);
33 | }
34 |
35 | @Override
36 | protected int getNumBytes() {
37 | return WriterImpl.SHORT_BYTE_SIZE;
38 | }
39 |
40 | private short latestValue; //< Latest key that was read from reader.
41 |
42 | /**
43 | * Read an short value from the stream.
44 | */
45 | private short readShort() throws IOException {
46 | return latestValue = (short) readPrimitive();
47 | }
48 |
49 | private short latestValue() {
50 | return latestValue;
51 | }
52 |
53 | ShortWritable createWritable(Object previous, short v) throws IOException {
54 | ShortWritable result = null;
55 | if (previous == null) {
56 | result = new ShortWritable();
57 | } else {
58 | result = (ShortWritable) previous;
59 | }
60 | result.set(v);
61 | return result;
62 | }
63 |
64 | @Override
65 | public Object createWritableFromLatest(Object previous) throws IOException {
66 | return createWritable(previous, latestValue());
67 | }
68 |
69 | /**
70 | * Give the next short as a primitive
71 | */
72 | @Override
73 | public short nextShort(boolean readStream) throws IOException {
74 | if (!readStream) {
75 | return latestValue();
76 | }
77 | if (!valuePresent) {
78 | throw new ValueNotPresentException("Cannot materialize short.");
79 | }
80 | return readShort();
81 | }
82 |
83 | @Override
84 | public Object next(Object previous) throws IOException {
85 | ShortWritable result = null;
86 | if (valuePresent) {
87 | result = createWritable(previous, readShort());
88 | }
89 | return result;
90 | }
91 |
92 | }
93 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyShortDirectTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 |
25 | import org.apache.hadoop.hive.serde2.io.ShortWritable;
26 |
27 | import com.facebook.hive.orc.SerializationUtils;
28 | import com.facebook.hive.orc.WriterImpl;
29 | import com.facebook.hive.orc.lazy.OrcLazyObject.ValueNotPresentException;
30 |
31 | class LazyShortDirectTreeReader extends LazyNumericDirectTreeReader {
32 | LazyShortDirectTreeReader(int columnId, long rowIndexStride) {
33 | super(columnId, rowIndexStride);
34 | }
35 |
36 | private short latestRead = 0; //< Latest integer read from the stream.
37 |
38 | /**
39 | * Read a short value from the stream.
40 | */
41 | private short readShort() throws IOException {
42 | latestRead =
43 | (short)SerializationUtils.readIntegerType(input, WriterImpl.SHORT_BYTE_SIZE,
44 | true, input.useVInts());
45 | return latestRead;
46 | }
47 |
48 |
49 | ShortWritable createWritable(Object previous, short v) throws IOException {
50 | ShortWritable result = null;
51 | if (previous == null) {
52 | result = new ShortWritable();
53 | } else {
54 | result = (ShortWritable) previous;
55 | }
56 | result.set(v);
57 | return result;
58 | }
59 |
60 | @Override
61 | public Object createWritableFromLatest(Object previous) throws IOException {
62 | return createWritable(previous, latestRead);
63 | }
64 |
65 | /**
66 | * Give the next short as a primitive
67 | */
68 | @Override
69 | public short nextShort(boolean readStream) throws IOException {
70 | if (!readStream)
71 | return latestRead;
72 | if (!valuePresent)
73 | throw new ValueNotPresentException("Cannot materialize short.");
74 | return readShort();
75 | }
76 |
77 | @Override
78 | public Object next(Object previous) throws IOException {
79 | ShortWritable result = null;
80 | if (valuePresent) {
81 | result = createWritable(previous, readShort());
82 | }
83 | return result;
84 | }
85 |
86 | @Override
87 | public void skipRows(long numNonNullValues) throws IOException {
88 | for (int i = 0; i < numNonNullValues; i++) {
89 | SerializationUtils.readIntegerType(input, WriterImpl.SHORT_BYTE_SIZE,
90 | true, input.useVInts());
91 | }
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyShortTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 | import java.util.List;
25 | import java.util.Map;
26 |
27 | import com.facebook.hive.orc.InStream;
28 | import com.facebook.hive.orc.OrcProto;
29 | import com.facebook.hive.orc.StreamName;
30 | import com.facebook.hive.orc.OrcProto.RowIndex;
31 |
32 | public class LazyShortTreeReader extends LazyIntTreeReader {
33 |
34 | public LazyShortTreeReader(int columnId, long rowIndexStride) {
35 | super(columnId, rowIndexStride);
36 | }
37 |
38 | @Override
39 | public void startStripe(Map streams, List encodings,
40 | RowIndex[] indexes, long rowBaseInStripe) throws IOException {
41 | switch (encodings.get(columnId).getKind()) {
42 | case DICTIONARY:
43 | reader = new LazyShortDictionaryTreeReader(columnId, rowIndexStride);
44 | break;
45 | case DIRECT:
46 | reader = new LazyShortDirectTreeReader(columnId, rowIndexStride);
47 | break;
48 | default:
49 | throw new IllegalArgumentException("Unsupported encoding " +
50 | encodings.get(columnId).getKind());
51 | }
52 | reader.startStripe(streams, encodings, indexes, rowBaseInStripe);
53 | }
54 |
55 | }
56 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyStringDirectTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.EOFException;
24 | import java.io.IOException;
25 | import java.util.List;
26 | import java.util.Map;
27 |
28 | import org.apache.hadoop.io.Text;
29 |
30 | import com.facebook.hive.orc.InStream;
31 | import com.facebook.hive.orc.OrcProto;
32 | import com.facebook.hive.orc.RunLengthIntegerReader;
33 | import com.facebook.hive.orc.StreamName;
34 | import com.facebook.hive.orc.WriterImpl;
35 | import com.facebook.hive.orc.OrcProto.RowIndex;
36 | import com.facebook.hive.orc.OrcProto.RowIndexEntry;
37 |
38 | class LazyStringDirectTreeReader extends LazyTreeReader {
39 | private InStream stream;
40 | private RunLengthIntegerReader lengths;
41 |
42 | LazyStringDirectTreeReader(int columnId, long rowIndexStride) {
43 | super(columnId, rowIndexStride);
44 | }
45 |
46 | @Override
47 | public void startStripe(Map streams, List encodings,
48 | RowIndex[] indexes, long rowBaseInStripe) throws IOException {
49 | super.startStripe(streams, encodings, indexes, rowBaseInStripe);
50 | StreamName name = new StreamName(columnId,
51 | OrcProto.Stream.Kind.DATA);
52 | stream = streams.get(name);
53 | lengths = new RunLengthIntegerReader(streams.get(new
54 | StreamName(columnId, OrcProto.Stream.Kind.LENGTH)),
55 | false, WriterImpl.INT_BYTE_SIZE);
56 | if (indexes[columnId] != null) {
57 | loadIndeces(indexes[columnId].getEntryList(), 0);
58 | }
59 | }
60 |
61 | @Override
62 | public void seek(int index) throws IOException {
63 | stream.seek(index);
64 | lengths.seek(index);
65 | }
66 |
67 | @Override
68 | public int loadIndeces(List rowIndexEntries, int startIndex) {
69 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex);
70 | updatedStartIndex = stream.loadIndeces(rowIndexEntries, updatedStartIndex);
71 | return lengths.loadIndeces(rowIndexEntries, updatedStartIndex);
72 | }
73 |
74 | @Override
75 | public Object next(Object previous) throws IOException {
76 | Text result = null;
77 | if (valuePresent) {
78 | if (previous == null) {
79 | result = new Text();
80 | } else {
81 | result = (Text) previous;
82 | }
83 | int len = (int) lengths.next();
84 | int offset = 0;
85 | byte[] bytes = new byte[len];
86 | while (len > 0) {
87 | int written = stream.read(bytes, offset, len);
88 | if (written < 0) {
89 | throw new EOFException("Can't finish byte read from " + stream);
90 | }
91 | len -= written;
92 | offset += written;
93 | }
94 | result.set(bytes);
95 | }
96 | return result;
97 | }
98 |
99 | @Override
100 | public void skipRows(long numNonNullValues) throws IOException {
101 | long lengthToSkip = 0;
102 | for(int i=0; i < numNonNullValues; ++i) {
103 | lengthToSkip += lengths.next();
104 | }
105 | stream.skip(lengthToSkip);
106 | }
107 |
108 | @Override
109 | public void close() throws IOException {
110 | super.close();
111 | if (stream != null) {
112 | stream.close();
113 | }
114 | if (lengths != null) {
115 | lengths.close();
116 | }
117 | }
118 | }
119 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyStringTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 | import java.util.List;
25 | import java.util.Map;
26 |
27 | import com.facebook.hive.orc.InStream;
28 | import com.facebook.hive.orc.OrcProto;
29 | import com.facebook.hive.orc.StreamName;
30 | import com.facebook.hive.orc.OrcProto.RowIndex;
31 | import com.facebook.hive.orc.OrcProto.RowIndexEntry;
32 |
33 | public class LazyStringTreeReader extends LazyTreeReader {
34 |
35 | private LazyTreeReader reader;
36 |
37 | public LazyStringTreeReader(int columnId, long rowIndexStride) {
38 | super(columnId, rowIndexStride);
39 | }
40 |
41 | @Override
42 | public void startStripe(Map streams, List encodings,
43 | RowIndex[] indexes, long rowBaseInStripe) throws IOException {
44 | // For each stripe, checks the encoding and initializes the appropriate reader
45 | switch (encodings.get(columnId).getKind()) {
46 | case DIRECT:
47 | reader = new LazyStringDirectTreeReader(columnId, rowIndexStride);
48 | break;
49 | case DICTIONARY:
50 | reader = new LazyStringDictionaryTreeReader(columnId, rowIndexStride);
51 | break;
52 | default:
53 | throw new IllegalArgumentException("Unsupported encoding " +
54 | encodings.get(columnId).getKind());
55 | }
56 |
57 | reader.startStripe(streams, encodings, indexes, rowBaseInStripe);
58 | }
59 |
60 | @Override
61 | public void seek(int index) throws IOException {
62 | reader.seek(index);
63 | }
64 |
65 | @Override
66 | public int loadIndeces(List rowIndexEntries, int startIndex) {
67 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex);
68 | return reader.loadIndeces(rowIndexEntries, updatedStartIndex);
69 | }
70 |
71 | @Override
72 | protected void seek(int rowIndexEntry, boolean backwards) throws IOException {
73 | reader.seek(rowIndexEntry, backwards);
74 | }
75 |
76 | @Override
77 | public Object next(Object previous) throws IOException {
78 | return reader.next(previous);
79 | }
80 |
81 | @Override
82 | public void skipRowsInComplexType(long numRows) throws IOException {
83 | reader.skipRowsInComplexType(numRows);
84 | }
85 |
86 | @Override
87 | public void skipRows(long numNonNullValues) throws IOException {
88 | reader.skipRows(numNonNullValues);
89 | }
90 |
91 | @Override
92 | public boolean nextIsNull(long currentRow) throws IOException {
93 | return reader.nextIsNull(currentRow);
94 | }
95 |
96 | @Override
97 | public boolean nextIsNullInComplexType() throws IOException {
98 | return reader.nextIsNullInComplexType();
99 | }
100 |
101 | @Override
102 | public Object getInComplexType(Object previous, long row) throws IOException {
103 | return reader.getInComplexType(previous, row);
104 | }
105 |
106 | @Override
107 | public Object get(long currentRow, Object previous) throws IOException {
108 | return reader.get(currentRow, previous);
109 | }
110 |
111 | @Override
112 | public void close() throws IOException {
113 | super.close();
114 | if (reader != null) {
115 | reader.close();
116 | }
117 | }
118 | }
119 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyStructTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 | import java.util.List;
25 | import java.util.Map;
26 |
27 | import com.facebook.hive.orc.InStream;
28 | import com.facebook.hive.orc.OrcProto;
29 | import com.facebook.hive.orc.OrcStruct;
30 | import com.facebook.hive.orc.StreamName;
31 | import com.facebook.hive.orc.OrcProto.RowIndex;
32 |
33 | public class LazyStructTreeReader extends LazyTreeReader {
34 |
35 | private final LazyTreeReader[] fields;
36 | private final List fieldNames;
37 |
38 | public LazyStructTreeReader(int columnId, long rowIndexStride, LazyTreeReader[] fields,
39 | List fieldNames) throws IOException {
40 | super(columnId, rowIndexStride);
41 | this.fields = fields;
42 | this.fieldNames = fieldNames;
43 | }
44 |
45 | @Override
46 | public void skipRows(long numNonNullValues) throws IOException {
47 | for(LazyTreeReader field: fields) {
48 | if (field != null) {
49 | field.skipRowsInComplexType(numNonNullValues);
50 | }
51 | }
52 | }
53 |
54 | @Override
55 | public Object next(Object previous) throws IOException {
56 | OrcStruct result = null;
57 | if (valuePresent) {
58 | if (previous == null) {
59 | result = new OrcStruct(fieldNames);
60 | } else {
61 | result = (OrcStruct) previous;
62 |
63 | // If the input format was initialized with a file with a
64 | // different number of fields, the number of fields needs to
65 | // be updated to the correct number
66 | result.setFieldNames(fieldNames);
67 | }
68 | for(int i=0; i < fields.length; ++i) {
69 | if (fields[i] != null) {
70 | result.setFieldValue(i,
71 | fields[i].getInComplexType(result.getFieldValue(i), previousRow));
72 | }
73 | }
74 | }
75 | return result;
76 | }
77 |
78 | @Override
79 | public void startStripe(Map streams,
80 | List encodings,
81 | RowIndex[] indexes,
82 | long rowBaseInStripe
83 | ) throws IOException {
84 | super.startStripe(streams, encodings, indexes, rowBaseInStripe);
85 | for (int i = 0; i < fields.length; i++) {
86 | if (fields[i] != null) {
87 | fields[i].startStripe(streams, encodings, indexes, rowBaseInStripe);
88 | }
89 | }
90 | if (indexes[columnId] != null) {
91 | loadIndeces(indexes[columnId].getEntryList(), 0);
92 | }
93 | }
94 |
95 | @Override
96 | protected void seek(int rowIndexEntry, boolean backwards) throws IOException {
97 | super.seek(rowIndexEntry, backwards);
98 | for (LazyTreeReader field : fields) {
99 | if (field != null) {
100 | field.seek(rowIndexEntry, backwards);
101 | }
102 | }
103 | }
104 |
105 | @Override
106 | protected void seek(int index) throws IOException {
107 | // Most tree readers have streams besides the present stream, e.g. the data for a simple type
108 | // or the length of a complex type. The only data structs contain besides whether or not
109 | // they're null is the fields themselves, each of which has its own tree reader, so nothing
110 | // to do here.
111 | }
112 |
113 | @Override
114 | public void close() throws IOException {
115 | super.close();
116 | for (LazyTreeReader field : fields) {
117 | if (field != null) {
118 | field.close();
119 | }
120 | }
121 | }
122 | }
123 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/LazyUnionTreeReader.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 | import java.util.List;
25 | import java.util.Map;
26 |
27 | import com.facebook.hive.orc.InStream;
28 | import com.facebook.hive.orc.OrcProto;
29 | import com.facebook.hive.orc.OrcUnion;
30 | import com.facebook.hive.orc.RunLengthByteReader;
31 | import com.facebook.hive.orc.StreamName;
32 | import com.facebook.hive.orc.OrcProto.RowIndex;
33 | import com.facebook.hive.orc.OrcProto.RowIndexEntry;
34 |
35 | public class LazyUnionTreeReader extends LazyTreeReader {
36 |
37 | private final LazyTreeReader[] fields;
38 | private RunLengthByteReader tags;
39 |
40 | public LazyUnionTreeReader(int columnId, long rowIndexStride, LazyTreeReader[] fields) {
41 | super(columnId, rowIndexStride);
42 | this.fields = fields;
43 | }
44 |
45 | @Override
46 | public Object next(Object previous) throws IOException {
47 | OrcUnion result = null;
48 | if (valuePresent) {
49 | if (previous == null) {
50 | result = new OrcUnion();
51 | } else {
52 | result = (OrcUnion) previous;
53 | }
54 | byte tag = nextTag();
55 | Object previousVal = result.getObject();
56 | result.set(tag, fields[tag].getInComplexType(tag == result.getTag() ?
57 | previousVal : null, previousRow));
58 | }
59 | return result;
60 | }
61 |
62 | @Override
63 | protected void seek(int rowIndexEntry, boolean backwards) throws IOException {
64 | super.seek(rowIndexEntry, backwards);
65 | for (LazyTreeReader field : fields) {
66 | field.seek(rowIndexEntry, backwards);
67 | }
68 | }
69 |
70 | @Override
71 | public void startStripe(Map streams, List encodings,
72 | RowIndex[] indexes, long rowBaseInStripe) throws IOException {
73 | super.startStripe(streams, encodings, indexes, rowBaseInStripe);
74 | for (int i = 0; i < fields.length; i++) {
75 | fields[i].startStripe(streams, encodings, indexes, rowBaseInStripe);
76 | }
77 | tags = new RunLengthByteReader(streams.get(new StreamName(columnId,
78 | OrcProto.Stream.Kind.DATA)));
79 | if (indexes[columnId] != null) {
80 | loadIndeces(indexes[columnId].getEntryList(), 0);
81 | }
82 | }
83 |
84 | @Override
85 | public void seek(int index) throws IOException {
86 | tags.seek(index);
87 | }
88 |
89 | @Override
90 | public int loadIndeces(List rowIndexEntries, int startIndex) {
91 | int updatedStartIndex = super.loadIndeces(rowIndexEntries, startIndex);
92 | return tags.loadIndeces(rowIndexEntries, updatedStartIndex);
93 | }
94 |
95 | public byte nextTag() throws IOException {
96 | return (byte) tags.next();
97 | }
98 |
99 | @Override
100 | public void skipRows(long numNonNullValues) throws IOException {
101 | long[] counts = new long[fields.length];
102 | for(int i=0; i < numNonNullValues; ++i) {
103 | counts[tags.next()] += 1;
104 | }
105 | for(int i=0; i < counts.length; ++i) {
106 | fields[i].skipRowsInComplexType(counts[i]);
107 | }
108 | }
109 |
110 | @Override
111 | public void close() throws IOException {
112 | super.close();
113 | for (LazyTreeReader field : fields) {
114 | field.close();
115 | }
116 | if (tags != null) {
117 | tags.close();
118 | }
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyBinary.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.io.BytesWritable;
24 |
25 | public class OrcLazyBinary extends OrcLazyObject {
26 |
27 | public OrcLazyBinary(LazyBinaryTreeReader treeReader) {
28 | super(treeReader);
29 | }
30 |
31 | public OrcLazyBinary(OrcLazyBinary copy) {
32 | super(copy);
33 | if (copy.previous != null) {
34 | BytesWritable copyPrevious = (BytesWritable) copy.previous;
35 | byte[] bytes = new byte[copyPrevious.getLength()];
36 | System.arraycopy(copyPrevious.getBytes(), 0, bytes, 0, copyPrevious.getLength());
37 | previous = new BytesWritable(bytes);
38 | }
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyBinaryObjectInspector.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.hive.serde2.lazy.LazyUtils;
24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
25 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
26 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
27 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
28 | import org.apache.hadoop.io.BytesWritable;
29 |
30 | public class OrcLazyBinaryObjectInspector extends OrcLazyPrimitiveObjectInspector implements
31 | BinaryObjectInspector {
32 |
33 | protected OrcLazyBinaryObjectInspector() {
34 | super(PrimitiveObjectInspectorUtils.binaryTypeEntry);
35 | }
36 |
37 | @Override
38 | public byte[] getPrimitiveJavaObject(Object o) {
39 | BytesWritable writable = getPrimitiveWritableObject(o);
40 | return writable == null ? null : LazyUtils.createByteArray(writable);
41 | }
42 |
43 | @Override
44 | public Object copyObject(Object o) {
45 | return o == null ? null : new OrcLazyBinary((OrcLazyBinary) o);
46 | }
47 |
48 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive
49 | public PrimitiveTypeInfo getTypeInfo() {
50 | return (PrimitiveTypeInfo) TypeInfoFactory.binaryTypeInfo;
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyBoolean.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.io.BooleanWritable;
24 |
25 | public class OrcLazyBoolean extends OrcLazyObject {
26 | public OrcLazyBoolean(LazyBooleanTreeReader treeReader) {
27 | super(treeReader);
28 | }
29 |
30 | public OrcLazyBoolean(OrcLazyBoolean copy) {
31 | super(copy);
32 | if (copy.previous != null) {
33 | previous = new BooleanWritable(((BooleanWritable)copy.previous).get());
34 | }
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyBooleanObjectInspector.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
25 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
26 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
27 | import org.apache.hadoop.io.BooleanWritable;
28 |
29 | public class OrcLazyBooleanObjectInspector extends OrcLazyPrimitiveObjectInspector implements
30 | BooleanObjectInspector {
31 |
32 | protected OrcLazyBooleanObjectInspector() {
33 | super(PrimitiveObjectInspectorUtils.booleanTypeEntry);
34 | }
35 |
36 | @Override
37 | public boolean get(Object o) {
38 | return ((BooleanWritable)getPrimitiveWritableObject(o)).get();
39 | }
40 |
41 | @Override
42 | public Object copyObject(Object o) {
43 | return o == null ? null : new OrcLazyBoolean((OrcLazyBoolean) o);
44 | }
45 |
46 | @Override
47 | public Object getPrimitiveJavaObject(Object o) {
48 | BooleanWritable writable = getPrimitiveWritableObject(o);
49 | return writable == null ? null : Boolean.valueOf(writable.get());
50 | }
51 |
52 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive
53 | public PrimitiveTypeInfo getTypeInfo() {
54 | return (PrimitiveTypeInfo) TypeInfoFactory.booleanTypeInfo;
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyByte.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.hive.serde2.io.ByteWritable;
24 |
25 | public class OrcLazyByte extends OrcLazyObject {
26 | public OrcLazyByte(LazyByteTreeReader treeReader) {
27 | super(treeReader);
28 | }
29 |
30 | public OrcLazyByte(OrcLazyByte copy) {
31 | super(copy);
32 | if (copy.previous != null) {
33 | previous = new ByteWritable(((ByteWritable)copy.previous).get());
34 | }
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyByteObjectInspector.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.hive.serde2.io.ByteWritable;
24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
25 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
26 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
27 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
28 |
29 | public class OrcLazyByteObjectInspector extends OrcLazyPrimitiveObjectInspector implements
30 | ByteObjectInspector {
31 |
32 | protected OrcLazyByteObjectInspector() {
33 | super(PrimitiveObjectInspectorUtils.byteTypeEntry);
34 | }
35 |
36 | @Override
37 | public byte get(Object o) {
38 | return ((ByteWritable)getPrimitiveWritableObject(o)).get();
39 | }
40 |
41 | @Override
42 | public Object copyObject(Object o) {
43 | return o == null ? null : new OrcLazyByte((OrcLazyByte) o);
44 | }
45 |
46 | @Override
47 | public Object getPrimitiveJavaObject(Object o) {
48 | ByteWritable writable = getPrimitiveWritableObject(o);
49 | return writable == null ? null : Byte.valueOf(writable.get());
50 | }
51 |
52 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive
53 | public PrimitiveTypeInfo getTypeInfo() {
54 | return (PrimitiveTypeInfo) TypeInfoFactory.byteTypeInfo;
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyDouble.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.hive.serde2.io.DoubleWritable;
24 |
25 | public class OrcLazyDouble extends OrcLazyObject {
26 |
27 | public OrcLazyDouble(LazyDoubleTreeReader treeReader) {
28 | super(treeReader);
29 | }
30 |
31 | public OrcLazyDouble(OrcLazyDouble copy) {
32 | super(copy);
33 | if (copy.previous != null) {
34 | previous = new DoubleWritable(((DoubleWritable)copy.previous).get());
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyDoubleObjectInspector.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.hive.serde2.io.DoubleWritable;
24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
25 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
26 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
27 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
28 |
29 | public class OrcLazyDoubleObjectInspector extends OrcLazyPrimitiveObjectInspector
30 | implements DoubleObjectInspector {
31 |
32 | protected OrcLazyDoubleObjectInspector() {
33 | super(PrimitiveObjectInspectorUtils.doubleTypeEntry);
34 | }
35 |
36 | public double get(Object o) {
37 | return ((DoubleWritable)getPrimitiveWritableObject(o)).get();
38 | }
39 |
40 | @Override
41 | public Object copyObject(Object o) {
42 | return o == null ? null : new OrcLazyDouble((OrcLazyDouble) o);
43 | }
44 |
45 | @Override
46 | public Object getPrimitiveJavaObject(Object o) {
47 | DoubleWritable writable = getPrimitiveWritableObject(o);
48 | return writable == null ? null : Double.valueOf(writable.get());
49 | }
50 |
51 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive
52 | public PrimitiveTypeInfo getTypeInfo() {
53 | return (PrimitiveTypeInfo) TypeInfoFactory.doubleTypeInfo;
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyFloat.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.io.FloatWritable;
24 |
25 | public class OrcLazyFloat extends OrcLazyObject {
26 |
27 | public OrcLazyFloat(LazyFloatTreeReader treeReader) {
28 | super(treeReader);
29 | }
30 |
31 | public OrcLazyFloat(OrcLazyFloat copy) {
32 | super(copy);
33 | if (copy.previous != null) {
34 | previous = new FloatWritable(((FloatWritable)copy.previous).get());
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyFloatObjectInspector.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
25 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
26 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
27 | import org.apache.hadoop.io.FloatWritable;
28 |
29 | public class OrcLazyFloatObjectInspector extends OrcLazyPrimitiveObjectInspector implements
30 | FloatObjectInspector {
31 |
32 | OrcLazyFloatObjectInspector() {
33 | super(PrimitiveObjectInspectorUtils.floatTypeEntry);
34 | }
35 |
36 | public float get(Object o) {
37 | return ((FloatWritable)getPrimitiveWritableObject(o)).get();
38 | }
39 |
40 | @Override
41 | public Object copyObject(Object o) {
42 | return o == null ? null : new OrcLazyFloat((OrcLazyFloat) o);
43 | }
44 |
45 | @Override
46 | public Object getPrimitiveJavaObject(Object o) {
47 | FloatWritable writable = getPrimitiveWritableObject(o);
48 | return writable == null ? null : Float.valueOf(writable.get());
49 | }
50 |
51 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive
52 | public PrimitiveTypeInfo getTypeInfo() {
53 | return (PrimitiveTypeInfo) TypeInfoFactory.floatTypeInfo;
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyInt.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.io.IntWritable;
24 |
25 | public class OrcLazyInt extends OrcLazyObject {
26 |
27 | public OrcLazyInt(LazyIntTreeReader treeReader) {
28 | super(treeReader);
29 | }
30 |
31 | public OrcLazyInt(OrcLazyInt copy) {
32 | super(copy);
33 | if (copy.previous != null) {
34 | previous = new IntWritable(((IntWritable)copy.previous).get());
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyIntObjectInspector.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
25 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
26 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
27 | import org.apache.hadoop.io.IntWritable;
28 |
29 | public class OrcLazyIntObjectInspector extends OrcLazyPrimitiveObjectInspector implements
30 | IntObjectInspector {
31 |
32 | protected OrcLazyIntObjectInspector() {
33 | super(PrimitiveObjectInspectorUtils.intTypeEntry);
34 | }
35 |
36 | @Override
37 | public int get(Object o) {
38 | return ((IntWritable)getPrimitiveWritableObject(o)).get();
39 | }
40 |
41 | @Override
42 | public Object copyObject(Object o) {
43 | return o == null ? null : new OrcLazyInt((OrcLazyInt) o);
44 | }
45 |
46 | @Override
47 | public Object getPrimitiveJavaObject(Object o) {
48 | IntWritable writable = (IntWritable) getPrimitiveWritableObject(o);
49 | return writable == null ? null : Integer.valueOf(writable.get());
50 | }
51 |
52 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive
53 | public PrimitiveTypeInfo getTypeInfo() {
54 | return (PrimitiveTypeInfo) TypeInfoFactory.intTypeInfo;
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyList.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | public class OrcLazyList extends OrcLazyObject {
24 |
25 | public OrcLazyList(LazyListTreeReader treeReader) {
26 | super(treeReader);
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyListObjectInspector.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 | import java.util.List;
25 |
26 | import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
28 | import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
29 |
30 | import com.facebook.hive.orc.OrcProto;
31 |
32 | public class OrcLazyListObjectInspector implements ListObjectInspector {
33 |
34 | private final ObjectInspector child;
35 |
36 | public OrcLazyListObjectInspector(ListTypeInfo info) {
37 | child = OrcLazyObjectInspectorUtils.createWritableObjectInspector(info.getListElementTypeInfo());
38 | }
39 |
40 | public OrcLazyListObjectInspector(int columnId, List types) {
41 | OrcProto.Type type = types.get(columnId);
42 | child = OrcLazyObjectInspectorUtils.createWritableObjectInspector(type.getSubtypes(0), types);
43 | }
44 |
45 | @Override
46 | public List> getList(Object data) {
47 | if (data == null) {
48 | return null;
49 | }
50 |
51 | try {
52 | return (List>) ((OrcLazyList) data).materialize();
53 | } catch (IOException e) {
54 | throw new RuntimeException(e);
55 | }
56 | }
57 |
58 | @Override
59 | public Object getListElement(Object data, int index) {
60 | List> list = getList(data);
61 |
62 | if (list == null || index < 0 || index >= list.size()) {
63 | return null;
64 | }
65 |
66 | return list.get(index);
67 | }
68 |
69 | @Override
70 | public ObjectInspector getListElementObjectInspector() {
71 | return child;
72 | }
73 |
74 | @Override
75 | public int getListLength(Object data) {
76 | if (data == null) {
77 | return -1;
78 | }
79 |
80 | return getList(data).size();
81 | }
82 |
83 | @Override
84 | public Category getCategory() {
85 | return Category.LIST;
86 | }
87 |
88 | @Override
89 | public String getTypeName() {
90 | return "array<" + child.getTypeName() + ">";
91 | }
92 |
93 | @Override
94 | public boolean equals(Object o) {
95 | if (o == null || o.getClass() != getClass()) {
96 | return false;
97 | } else if (o == this) {
98 | return true;
99 | } else {
100 | ObjectInspector other = ((OrcLazyListObjectInspector) o).child;
101 | return other.equals(child);
102 | }
103 | }
104 | }
105 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyLong.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.io.LongWritable;
24 |
25 | public class OrcLazyLong extends OrcLazyObject {
26 |
27 | public OrcLazyLong(LazyLongTreeReader treeReader) {
28 | super(treeReader);
29 | }
30 |
31 | public OrcLazyLong(OrcLazyLong copy) {
32 | super(copy);
33 | if (copy.previous != null) {
34 | previous = new LongWritable(((LongWritable)copy.previous).get());
35 | }
36 | }
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyLongObjectInspector.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
25 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
26 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
27 | import org.apache.hadoop.io.LongWritable;
28 |
29 | public class OrcLazyLongObjectInspector extends OrcLazyPrimitiveObjectInspector
30 | implements LongObjectInspector {
31 |
32 | protected OrcLazyLongObjectInspector() {
33 | super(PrimitiveObjectInspectorUtils.longTypeEntry);
34 | }
35 |
36 | @Override
37 | public Object copyObject(Object o) {
38 | return o == null ? null : new OrcLazyLong((OrcLazyLong) o);
39 | }
40 |
41 | @Override
42 | public Object getPrimitiveJavaObject(Object o) {
43 | LongWritable writable = (LongWritable) getPrimitiveWritableObject(o);
44 | return writable == null ? null : Long.valueOf(writable.get());
45 | }
46 |
47 | @Override
48 | public long get(Object o) {
49 | return getPrimitiveWritableObject(o).get();
50 | }
51 |
52 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive
53 | public PrimitiveTypeInfo getTypeInfo() {
54 | return (PrimitiveTypeInfo) TypeInfoFactory.longTypeInfo;
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyMap.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | public class OrcLazyMap extends OrcLazyObject {
24 |
25 | public OrcLazyMap(LazyMapTreeReader treeReader) {
26 | super(treeReader);
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyMapObjectInspector.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 | import java.util.List;
25 | import java.util.Map;
26 |
27 | import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
29 | import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
30 |
31 | import com.facebook.hive.orc.OrcProto;
32 |
33 | public class OrcLazyMapObjectInspector implements MapObjectInspector {
34 |
35 | private final ObjectInspector key;
36 | private final ObjectInspector value;
37 |
38 | public OrcLazyMapObjectInspector(MapTypeInfo info) {
39 | key = OrcLazyObjectInspectorUtils.createWritableObjectInspector(info.getMapKeyTypeInfo());
40 | value = OrcLazyObjectInspectorUtils.createWritableObjectInspector(info.getMapValueTypeInfo());
41 | }
42 |
43 | public OrcLazyMapObjectInspector(int columnId, List types) {
44 | OrcProto.Type type = types.get(columnId);
45 | key = OrcLazyObjectInspectorUtils.createWritableObjectInspector(type.getSubtypes(0), types);
46 | value = OrcLazyObjectInspectorUtils.createWritableObjectInspector(type.getSubtypes(1), types);
47 | }
48 |
49 | @Override
50 | public Map, ?> getMap(Object data) {
51 | if (data == null) {
52 | return null;
53 | }
54 |
55 | try {
56 | return (Map, ?>) ((OrcLazyMap) data).materialize();
57 | } catch (IOException e) {
58 | throw new RuntimeException(e);
59 | }
60 | }
61 |
62 | @Override
63 | public ObjectInspector getMapKeyObjectInspector() {
64 | return key;
65 | }
66 |
67 | @Override
68 | public int getMapSize(Object data) {
69 | if (data == null) {
70 | return -1;
71 | }
72 |
73 | return getMap(data).size();
74 | }
75 |
76 | @Override
77 | public Object getMapValueElement(Object data, Object key) {
78 | if (data == null) {
79 | return null;
80 | }
81 |
82 | return getMap(data).get(key);
83 | }
84 |
85 | @Override
86 | public ObjectInspector getMapValueObjectInspector() {
87 | return value;
88 | }
89 |
90 | @Override
91 | public Category getCategory() {
92 | return Category.MAP;
93 | }
94 |
95 | @Override
96 | public String getTypeName() {
97 | return "map<" + key.getTypeName() + "," + value.getTypeName() + ">";
98 | }
99 |
100 | @Override
101 | public boolean equals(Object o) {
102 | if (o == null || o.getClass() != getClass()) {
103 | return false;
104 | } else if (o == this) {
105 | return true;
106 | } else {
107 | OrcLazyMapObjectInspector other = (OrcLazyMapObjectInspector) o;
108 | return other.key.equals(key) && other.value.equals(value);
109 | }
110 | }
111 |
112 | }
113 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyPrimitiveObjectInspector.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 |
25 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
26 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry;
27 | import org.apache.hadoop.io.Writable;
28 |
29 | public abstract class OrcLazyPrimitiveObjectInspector implements PrimitiveObjectInspector {
30 |
31 | protected final transient PrimitiveTypeEntry typeEntry;
32 |
33 | protected OrcLazyPrimitiveObjectInspector(PrimitiveTypeEntry typeEntry) {
34 | this.typeEntry = typeEntry;
35 | }
36 |
37 | @SuppressWarnings("unchecked")
38 | @Override
39 | public U getPrimitiveWritableObject(Object o) {
40 | try {
41 | return o == null ? null : (U)((T)o).materialize();
42 | } catch (IOException e) {
43 | throw new RuntimeException(e);
44 | }
45 | }
46 |
47 | @Override
48 | public boolean preferWritable() {
49 | return true;
50 | }
51 |
52 | /**
53 | * Return the associated Java primitive class for this primitive
54 | * ObjectInspector.
55 | */
56 | @Override
57 | public Class> getJavaPrimitiveClass() {
58 | return typeEntry.primitiveJavaClass;
59 | }
60 |
61 | /**
62 | * Return the associated primitive category for this primitive
63 | * ObjectInspector.
64 | */
65 | @Override
66 | public PrimitiveCategory getPrimitiveCategory() {
67 | return typeEntry.primitiveCategory;
68 | }
69 |
70 | /**
71 | * Return the associated primitive Writable class for this primitive
72 | * ObjectInspector.
73 | */
74 | @Override
75 | public Class> getPrimitiveWritableClass() {
76 | return typeEntry.primitiveWritableClass;
77 | }
78 |
79 | /**
80 | * Return the associated category this primitive ObjectInspector.
81 | */
82 | @Override
83 | public Category getCategory() {
84 | return Category.PRIMITIVE;
85 | }
86 |
87 | /**
88 | * Return the type name for this primitive ObjectInspector.
89 | */
90 | @Override
91 | public String getTypeName() {
92 | return typeEntry.typeName;
93 | }
94 |
95 | /**
96 | * The precision of the underlying data.
97 | */
98 | @SuppressWarnings({"override", "UnusedDeclaration"}) // Hive 0.13
99 | public int precision() {
100 | return 0;
101 | }
102 |
103 | /**
104 | * The scale of the underlying data.
105 | */
106 | @SuppressWarnings({"override", "UnusedDeclaration"}) // Hive 0.13
107 | public int scale() {
108 | return 0;
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyRow.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 | import java.util.List;
25 | import java.util.Map;
26 |
27 | import com.facebook.hive.orc.InStream;
28 | import com.facebook.hive.orc.OrcProto;
29 | import com.facebook.hive.orc.OrcStruct;
30 | import com.facebook.hive.orc.StreamName;
31 | import com.facebook.hive.orc.OrcProto.RowIndex;
32 |
33 | public class OrcLazyRow extends OrcLazyStruct {
34 |
35 | private OrcLazyObject[] fields;
36 | private final List fieldNames;
37 |
38 | public OrcLazyRow(OrcLazyObject[] fields, List fieldNames) {
39 | super(null);
40 | this.fields = fields;
41 | this.fieldNames = fieldNames;
42 | }
43 |
44 | @Override
45 | public void next() {
46 | super.next();
47 | for (OrcLazyObject field : fields) {
48 | if (field != null) {
49 | field.next();
50 | }
51 | }
52 | }
53 |
54 | @Override
55 | public void startStripe(Map streams, List encodings,
56 | RowIndex[] indexes, long rowBaseInStripe) throws IOException {
57 | for (OrcLazyObject field : fields) {
58 | if (field != null) {
59 | field.startStripe(streams, encodings, indexes, rowBaseInStripe);
60 | }
61 | }
62 | }
63 |
64 | @Override
65 | public Object materialize(long row, Object previous) throws IOException {
66 | OrcStruct previousRow;
67 | if (previous != null) {
68 | previousRow = (OrcStruct) previous;
69 | previousRow.setFieldNames(fieldNames);
70 | } else {
71 | previousRow = new OrcStruct(fieldNames);
72 | }
73 | for (int i = 0; i < fields.length; i++) {
74 | previousRow.setFieldValue(i, fields[i]);
75 | }
76 | return previousRow;
77 | }
78 |
79 | @Override
80 | public void seekToRow(long rowNumber) throws IOException {
81 | for (OrcLazyObject field : fields) {
82 | if (field != null) {
83 | field.seekToRow(rowNumber);
84 | }
85 | }
86 | }
87 |
88 | public int getNumFields() {
89 | return fields.length;
90 | }
91 |
92 | public OrcLazyObject getFieldValue(int index) {
93 | if (index >= fields.length) {
94 | return null;
95 | }
96 |
97 | return fields[index];
98 | }
99 |
100 | public void reset(OrcLazyRow other) throws IOException {
101 | this.fields = other.getRawFields();
102 | seekToRow(0);
103 | }
104 |
105 | public OrcLazyObject[] getRawFields() {
106 | return fields;
107 | }
108 |
109 | @Override
110 | public void close() throws IOException {
111 | for (OrcLazyObject field : fields) {
112 | if (field != null) {
113 | field.close();
114 | }
115 | }
116 | }
117 | }
118 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyRowObjectInspector.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 | import java.util.ArrayList;
25 | import java.util.List;
26 |
27 | import com.facebook.hive.orc.OrcProto;
28 | import com.facebook.hive.orc.OrcStruct.Field;
29 | import org.apache.hadoop.hive.serde2.objectinspector.StructField;
30 | import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
31 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
32 |
33 | public class OrcLazyRowObjectInspector extends OrcLazyStructObjectInspector {
34 |
35 | public OrcLazyRowObjectInspector(StructTypeInfo info) {
36 | super(info.getAllStructFieldNames().size());
37 | ArrayList fieldNames = info.getAllStructFieldNames();
38 | ArrayList fieldTypes = info.getAllStructFieldTypeInfos();
39 | for(int i=0; i < fieldNames.size(); ++i) {
40 | fields.add(new Field(fieldNames.get(i),
41 | OrcLazyObjectInspectorUtils.createLazyObjectInspector(fieldTypes.get(i)), i));
42 | }
43 | }
44 |
45 | public OrcLazyRowObjectInspector(int columnId, List types) {
46 | super(types.get(columnId).getSubtypesCount());
47 | OrcProto.Type type = types.get(columnId);
48 | int fieldCount = type.getSubtypesCount();
49 | for(int i=0; i < fieldCount; ++i) {
50 | int fieldType = type.getSubtypes(i);
51 | fields.add(new Field(type.getFieldNames(i),
52 | OrcLazyObjectInspectorUtils.createLazyObjectInspector(fieldType, types), i));
53 | }
54 | }
55 |
56 | @Override
57 | public Object getStructFieldData(Object data, StructField fieldRef) {
58 | int offset = ((Field) fieldRef).getOffset();
59 |
60 | try {
61 | OrcLazyObject obj = ((OrcLazyRow) data).getFieldValue(offset);
62 | if (obj != null) {
63 | obj.materialize();
64 | return obj.nextIsNull() ? null : obj;
65 | }
66 | return null;
67 | } catch (IOException e) {
68 | throw new RuntimeException(e);
69 | }
70 | }
71 |
72 | @Override
73 | public StructField getStructFieldRef(String fieldName) {
74 | for(StructField field: fields) {
75 | if (field.getFieldName().equals(fieldName)) {
76 | return field;
77 | }
78 | }
79 | return null;
80 | }
81 |
82 | @Override
83 | public List getStructFieldsDataAsList(Object data) {
84 | OrcLazyRow row = (OrcLazyRow) data;
85 | int numFields = row.getNumFields();
86 | List result = new ArrayList(numFields);
87 | for (int i = 0; i < numFields; i++) {
88 | try {
89 | OrcLazyObject obj = (OrcLazyObject) row.getFieldValue(i);
90 | result.add(obj == null || obj.nextIsNull() ? null : obj);
91 | } catch (IOException e) {
92 | throw new RuntimeException(e);
93 | }
94 |
95 | }
96 | return result;
97 | }
98 | }
99 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyShort.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.hive.serde2.io.ShortWritable;
24 |
25 | public class OrcLazyShort extends OrcLazyObject {
26 |
27 | public OrcLazyShort(LazyShortTreeReader treeReader) {
28 | super(treeReader);
29 | }
30 |
31 | public OrcLazyShort(OrcLazyShort copy) {
32 | super(copy);
33 | if (copy.previous != null) {
34 | previous = new ShortWritable(((ShortWritable)copy.previous).get());
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyShortObjectInspector.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.hive.serde2.io.ShortWritable;
24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
25 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
26 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
27 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
28 |
29 | public class OrcLazyShortObjectInspector extends OrcLazyPrimitiveObjectInspector
30 | implements ShortObjectInspector {
31 |
32 | protected OrcLazyShortObjectInspector() {
33 | super(PrimitiveObjectInspectorUtils.shortTypeEntry);
34 | }
35 |
36 | @Override
37 | public short get(Object o) {
38 | return ((ShortWritable)getPrimitiveWritableObject(o)).get();
39 | }
40 |
41 | @Override
42 | public Object copyObject(Object o) {
43 | return o == null ? null : new OrcLazyShort((OrcLazyShort) o);
44 | }
45 |
46 | @Override
47 | public Object getPrimitiveJavaObject(Object o) {
48 | ShortWritable writable = getPrimitiveWritableObject(o);
49 | return writable == null ? null : Short.valueOf(writable.get());
50 | }
51 |
52 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive
53 | public PrimitiveTypeInfo getTypeInfo() {
54 | return (PrimitiveTypeInfo) TypeInfoFactory.shortTypeInfo;
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyString.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.io.Text;
24 |
25 | public class OrcLazyString extends OrcLazyObject {
26 |
27 | public OrcLazyString(LazyStringTreeReader treeReader) {
28 | super(treeReader);
29 | }
30 |
31 | public OrcLazyString(OrcLazyString copy) {
32 | super(copy);
33 | if (copy.previous != null) {
34 | previous = new Text((Text)copy.previous);
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyStringObjectInspector.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
24 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
25 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
26 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
27 | import org.apache.hadoop.io.Text;
28 |
29 | public class OrcLazyStringObjectInspector extends OrcLazyPrimitiveObjectInspector
30 | implements StringObjectInspector {
31 |
32 | protected OrcLazyStringObjectInspector() {
33 | super(PrimitiveObjectInspectorUtils.stringTypeEntry);
34 | }
35 |
36 | @Override
37 | public String getPrimitiveJavaObject(Object o) {
38 | Text text = getPrimitiveWritableObject(o);
39 | return text == null ? null : text.toString();
40 | }
41 |
42 | @Override
43 | public Object copyObject(Object o) {
44 | return o == null ? null : new OrcLazyString((OrcLazyString) o);
45 | }
46 |
47 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive
48 | public PrimitiveTypeInfo getTypeInfo() {
49 | return (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo;
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyStruct.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | public class OrcLazyStruct extends OrcLazyObject {
24 |
25 | public OrcLazyStruct(LazyStructTreeReader treeReader) {
26 | super(treeReader);
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyTimestamp.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import org.apache.hadoop.hive.serde2.io.TimestampWritable;
24 |
25 | public class OrcLazyTimestamp extends OrcLazyObject {
26 |
27 | public OrcLazyTimestamp(LazyTimestampTreeReader treeReader) {
28 | super(treeReader);
29 | }
30 |
31 | public OrcLazyTimestamp(OrcLazyTimestamp copy) {
32 | super(copy);
33 | if (copy.previous != null) {
34 | previous = new TimestampWritable(((TimestampWritable)copy.previous));
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyTimestampObjectInspector.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.sql.Timestamp;
24 |
25 | import org.apache.hadoop.hive.serde2.io.TimestampWritable;
26 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
27 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
28 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
29 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
30 |
31 | public class OrcLazyTimestampObjectInspector extends
32 | OrcLazyPrimitiveObjectInspector implements TimestampObjectInspector {
33 |
34 | protected OrcLazyTimestampObjectInspector() {
35 | super(PrimitiveObjectInspectorUtils.timestampTypeEntry);
36 | }
37 |
38 | @Override
39 | public Timestamp getPrimitiveJavaObject(Object o) {
40 | TimestampWritable writable = getPrimitiveWritableObject(o);
41 | return writable == null ? null : writable.getTimestamp();
42 | }
43 |
44 | @Override
45 | public Object copyObject(Object o) {
46 | return o == null ? null : new OrcLazyTimestamp((OrcLazyTimestamp) o);
47 | }
48 |
49 | @SuppressWarnings({"override", "UnusedDeclaration", "RedundantCast"}) // FB Hive
50 | public PrimitiveTypeInfo getTypeInfo() {
51 | return (PrimitiveTypeInfo) TypeInfoFactory.timestampTypeInfo;
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyUnion.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | public class OrcLazyUnion extends OrcLazyObject {
24 |
25 | public OrcLazyUnion(LazyUnionTreeReader treeReader) {
26 | super(treeReader);
27 | }
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/lazy/OrcLazyUnionObjectInspector.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package com.facebook.hive.orc.lazy;
22 |
23 | import java.io.IOException;
24 | import java.util.ArrayList;
25 | import java.util.List;
26 |
27 | import com.facebook.hive.orc.OrcProto;
28 | import com.facebook.hive.orc.OrcUnion;
29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
30 | import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
31 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
32 | import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
33 |
34 | public class OrcLazyUnionObjectInspector implements UnionObjectInspector {
35 |
36 | private final List children;
37 |
38 | public OrcLazyUnionObjectInspector(int columnId, List types) {
39 | OrcProto.Type type = types.get(columnId);
40 | children = new ArrayList(type.getSubtypesCount());
41 | for(int i=0; i < type.getSubtypesCount(); ++i) {
42 | children.add(OrcLazyObjectInspectorUtils.createWritableObjectInspector(type.getSubtypes(i),
43 | types));
44 | }
45 | }
46 |
47 | public OrcLazyUnionObjectInspector(UnionTypeInfo info) {
48 | List unionChildren = info.getAllUnionObjectTypeInfos();
49 | this.children = new ArrayList(unionChildren.size());
50 | for(TypeInfo child: info.getAllUnionObjectTypeInfos()) {
51 | this.children.add(OrcLazyObjectInspectorUtils.createWritableObjectInspector(child));
52 | }
53 | }
54 |
55 | private OrcUnion get(Object o) {
56 | if (o == null) {
57 | return null;
58 | }
59 |
60 | try {
61 | return (OrcUnion) ((OrcLazyUnion) o).materialize();
62 | } catch (IOException e) {
63 | throw new RuntimeException(e);
64 | }
65 | }
66 |
67 | @Override
68 | public Object getField(Object o) {
69 | if (o == null) {
70 | return null;
71 | }
72 |
73 | return get(o).getObject();
74 | }
75 |
76 | @Override
77 | public List getObjectInspectors() {
78 | return children;
79 | }
80 |
81 | @Override
82 | public byte getTag(Object o) {
83 | if (o == null) {
84 | return -1;
85 | }
86 |
87 | return get(o).getTag();
88 | }
89 |
90 | @Override
91 | public Category getCategory() {
92 | return Category.UNION;
93 | }
94 |
95 | @Override
96 | public String getTypeName() {
97 | StringBuilder builder = new StringBuilder("uniontype<");
98 | boolean first = true;
99 | for(ObjectInspector child: children) {
100 | if (first) {
101 | first = false;
102 | } else {
103 | builder.append(",");
104 | }
105 | builder.append(child.getTypeName());
106 | }
107 | builder.append(">");
108 | return builder.toString();
109 | }
110 |
111 | @Override
112 | public boolean equals(Object o) {
113 | if (o == null || o.getClass() != getClass()) {
114 | return false;
115 | } else if (o == this) {
116 | return true;
117 | } else {
118 | List other = ((OrcLazyUnionObjectInspector) o).children;
119 | if (other.size() != children.size()) {
120 | return false;
121 | }
122 | for(int i = 0; i < children.size(); ++i) {
123 | if (!other.get(i).equals(children.get(i))) {
124 | return false;
125 | }
126 | }
127 | return true;
128 | }
129 | }
130 | }
131 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/package-info.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | /**
22 | * The Optimized Row Columnar (ORC) File Format.
23 | *
24 | * This format:
25 | *
26 | * Decomposes complex column types into primitives
27 | * Uses type-specific encoders for each column
28 | *
29 | * Dictionary encodings for low cardinality columns
30 | * Run length encoding of data
31 | * variable length encoding of integers
32 | *
33 | *
34 | * Divides file into large stripes
35 | * Each stripe includes light-weight indexes that enable the reader to
36 | * skip large sets of rows that don't satisfy the filter condition
37 | * A file footer that contains meta-information about file
38 | *
39 | * Precise byte range for each stripe
40 | * Type information for the file
41 | * Any user meta-information
42 | *
43 | *
44 | * Seek to row number is implemented to support secondary indexes
45 | * Support for additional generic compression: LZO, SNAPPY, ZLIB.
46 | *
47 | *
48 | *
49 | * Format:
50 | *
51 | * {@code
52 | * HEADER (3 bytes) "ORC"
53 | * STRIPE (0 or more stripes)
54 | * FILE-FOOTER
55 | * POST SCRIPT
56 | * PS LENGTH (1 byte)
57 | * }
58 | *
59 | *
60 | *
61 | *
62 | * Stripe:
63 | *
64 | * {@code
65 | * INDEX-STREAM (0 or more)
66 | * DATA-STREAM (0 or more)
67 | * STRIPE-FOOTER
68 | * }
69 | *
70 | *
71 | */
72 | package com.facebook.hive.orc;
73 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/statistics/BooleanColumnStatistics.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package com.facebook.hive.orc.statistics;
21 |
22 | /**
23 | * Statistics for boolean columns.
24 | */
25 | public interface BooleanColumnStatistics extends ColumnStatistics {
26 | long getFalseCount();
27 |
28 | long getTrueCount();
29 | }
30 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/statistics/ColumnStatistics.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package com.facebook.hive.orc.statistics;
21 |
22 | /**
23 | * Statistics that are available for all types of columns.
24 | */
25 | public interface ColumnStatistics {
26 | /**
27 | * Get the number of values in this column. It will differ from the number
28 | * of rows because of NULL values and repeated values.
29 | * @return the number of values
30 | */
31 | long getNumberOfValues();
32 | }
33 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/statistics/DoubleColumnStatistics.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package com.facebook.hive.orc.statistics;
21 |
22 | /**
23 | * Statistics for float and double columns.
24 | */
25 | public interface DoubleColumnStatistics extends ColumnStatistics {
26 |
27 | /**
28 | * Get the smallest value in the column. Only defined if getNumberOfValues
29 | * is non-zero.
30 | * @return the minimum
31 | */
32 | double getMinimum();
33 |
34 | /**
35 | * Get the largest value in the column. Only defined if getNumberOfValues
36 | * is non-zero.
37 | * @return the maximum
38 | */
39 | double getMaximum();
40 |
41 | /**
42 | * Get the sum of the values in the column.
43 | * @return the sum
44 | */
45 | double getSum();
46 | }
47 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/statistics/IntegerColumnStatistics.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package com.facebook.hive.orc.statistics;
21 |
22 | /**
23 | * Statistics for all of the integer columns, such as byte, short, int, and
24 | * long.
25 | */
26 | public interface IntegerColumnStatistics extends ColumnStatistics {
27 | /**
28 | * Get the smallest value in the column. Only defined if getNumberOfValues
29 | * is non-zero.
30 | * @return the minimum
31 | */
32 | long getMinimum();
33 |
34 | /**
35 | * Get the largest value in the column. Only defined if getNumberOfValues
36 | * is non-zero.
37 | * @return the maximum
38 | */
39 | long getMaximum();
40 |
41 | /**
42 | * Is the sum defined? If the sum overflowed the counter this will be false.
43 | * @return is the sum available
44 | */
45 | boolean isSumDefined();
46 |
47 | /**
48 | * Get the sum of the column. Only valid if isSumDefined returns true.
49 | * @return the sum of the column
50 | */
51 | long getSum();
52 | }
53 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/java/com/facebook/hive/orc/statistics/StringColumnStatistics.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package com.facebook.hive.orc.statistics;
21 |
22 | /**
23 | * Statistics for string columns.
24 | */
25 | public interface StringColumnStatistics extends ColumnStatistics {
26 | /**
27 | * Get the minimum string.
28 | * @return the minimum
29 | */
30 | String getMinimum();
31 |
32 | /**
33 | * Get the maximum string.
34 | * @return the maximum
35 | */
36 | String getMaximum();
37 | }
38 |
--------------------------------------------------------------------------------
/hive-dwrf/src/main/resources/protobuf/com/facebook/hive/orc/orc_proto.proto:
--------------------------------------------------------------------------------
1 | package com.facebook.hive.orc;
2 |
3 | message IntegerStatistics {
4 | optional sint64 minimum = 1;
5 | optional sint64 maximum = 2;
6 | optional sint64 sum = 3;
7 | }
8 |
9 | message DoubleStatistics {
10 | optional double minimum = 1;
11 | optional double maximum = 2;
12 | optional double sum = 3;
13 | }
14 |
15 | message StringStatistics {
16 | optional string minimum = 1;
17 | optional string maximum = 2;
18 | }
19 |
20 | message BucketStatistics {
21 | repeated uint64 count = 1 [packed=true];
22 | }
23 |
24 | message ColumnStatistics {
25 | optional uint64 numberOfValues = 1;
26 | optional IntegerStatistics intStatistics = 2;
27 | optional DoubleStatistics doubleStatistics = 3;
28 | optional StringStatistics stringStatistics = 4;
29 | optional BucketStatistics bucketStatistics = 5;
30 | }
31 |
32 | message RowIndexEntry {
33 | repeated uint64 positions = 1 [packed=true];
34 | optional ColumnStatistics statistics = 2;
35 | }
36 |
37 | message RowIndex {
38 | repeated RowIndexEntry entry = 1;
39 | }
40 |
41 | message Stream {
42 | // if you add new index stream kinds, you need to make sure to update
43 | // StreamName to ensure it is added to the stripe in the right area
44 | enum Kind {
45 | PRESENT = 0;
46 | DATA = 1;
47 | LENGTH = 2;
48 | DICTIONARY_DATA = 3;
49 | DICTIONARY_COUNT = 4;
50 | NANO_DATA = 5;
51 | ROW_INDEX = 6;
52 | IN_DICTIONARY = 7;
53 | STRIDE_DICTIONARY = 8;
54 | STRIDE_DICTIONARY_LENGTH = 9;
55 | }
56 | required Kind kind = 1;
57 | optional uint32 column = 2;
58 | optional uint64 length = 3;
59 | optional bool useVInts = 4 [default = true];
60 | }
61 |
62 | message ColumnEncoding {
63 | enum Kind {
64 | DIRECT = 0;
65 | DICTIONARY = 1;
66 | }
67 | required Kind kind = 1;
68 | optional uint32 dictionarySize = 2;
69 | }
70 |
71 | message StripeFooter {
72 | repeated Stream streams = 1;
73 | repeated ColumnEncoding columns = 2;
74 | }
75 |
76 | message Type {
77 | enum Kind {
78 | BOOLEAN = 0;
79 | BYTE = 1;
80 | SHORT = 2;
81 | INT = 3;
82 | LONG = 4;
83 | FLOAT = 5;
84 | DOUBLE = 6;
85 | STRING = 7;
86 | BINARY = 8;
87 | TIMESTAMP = 9;
88 | LIST = 10;
89 | MAP = 11;
90 | STRUCT = 12;
91 | UNION = 13;
92 | }
93 | required Kind kind = 1;
94 | repeated uint32 subtypes = 2 [packed=true];
95 | repeated string fieldNames = 3;
96 | }
97 |
98 | message StripeInformation {
99 | optional uint64 offset = 1;
100 | optional uint64 indexLength = 2;
101 | optional uint64 dataLength = 3;
102 | optional uint64 footerLength = 4;
103 | optional uint64 numberOfRows = 5;
104 | optional uint64 rawDataSize = 6;
105 | }
106 |
107 | message UserMetadataItem {
108 | required string name = 1;
109 | required bytes value = 2;
110 | }
111 |
112 | message Footer {
113 | optional uint64 headerLength = 1;
114 | optional uint64 contentLength = 2;
115 | repeated StripeInformation stripes = 3;
116 | repeated Type types = 4;
117 | repeated UserMetadataItem metadata = 5;
118 | optional uint64 numberOfRows = 6;
119 | repeated ColumnStatistics statistics = 7;
120 | optional uint32 rowIndexStride = 8;
121 | optional uint64 rawDataSize = 9;
122 | }
123 |
124 | enum CompressionKind {
125 | NONE = 0;
126 | ZLIB = 1;
127 | SNAPPY = 2;
128 | LZO = 3;
129 | }
130 |
131 | // Serialized length must be less that 255 bytes
132 | message PostScript {
133 | optional uint64 footerLength = 1;
134 | optional CompressionKind compression = 2;
135 | optional uint64 compressionBlockSize = 3;
136 | }
137 |
--------------------------------------------------------------------------------
/hive-dwrf/src/test/java/com/facebook/hive/orc/MemoryManagerWithForce.java:
--------------------------------------------------------------------------------
1 | package com.facebook.hive.orc;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 |
5 | import java.io.IOException;
6 |
7 | /**
8 | *
9 | * MemoryManagerWithForce.
10 | *
11 | * An implementation of MemoryManager with the ability to force writers to flush their stripes
12 | * and to enter low memory mode.
13 | */
14 | public class MemoryManagerWithForce extends MemoryManager {
15 |
16 | MemoryManagerWithForce(Configuration conf) {
17 | super(conf);
18 | }
19 |
20 | public void forceEnterLowMemoryMode() throws IOException {
21 | for (WriterInfo writer : writerList.values()) {
22 | writer.getCallback().enterLowMemoryMode();
23 | }
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/hive-dwrf/src/test/java/com/facebook/hive/orc/TestColumnStatistics.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 |
3 | /**
4 | * Licensed to the Apache Software Foundation (ASF) under one
5 | * or more contributor license agreements. See the NOTICE file
6 | * distributed with this work for additional information
7 | * regarding copyright ownership. The ASF licenses this file
8 | * to you under the Apache License, Version 2.0 (the
9 | * "License"); you may not use this file except in compliance
10 | * with the License. You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package com.facebook.hive.orc;
21 |
22 | import com.facebook.hive.orc.statistics.ColumnStatisticsImpl;
23 | import com.facebook.hive.orc.statistics.DoubleColumnStatistics;
24 | import com.facebook.hive.orc.statistics.IntegerColumnStatistics;
25 | import com.facebook.hive.orc.statistics.StringColumnStatistics;
26 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
27 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
28 | import org.junit.Test;
29 |
30 | import static junit.framework.Assert.assertEquals;
31 |
32 | /**
33 | * Test ColumnStatisticsImpl for ORC.
34 | */
35 | public class TestColumnStatistics {
36 |
37 | @Test
38 | public void testIntegerStatisticsMerge() throws Exception {
39 | ObjectInspector inspector =
40 | PrimitiveObjectInspectorFactory.javaIntObjectInspector;
41 |
42 | ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
43 | ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
44 | stats1.updateInteger(10);
45 | stats1.updateInteger(10);
46 | stats2.updateInteger(1);
47 | stats2.updateInteger(1000);
48 | stats1.merge(stats2);
49 | IntegerColumnStatistics typed = (IntegerColumnStatistics) stats1;
50 | assertEquals(1, typed.getMinimum());
51 | assertEquals(1000, typed.getMaximum());
52 | }
53 |
54 | @Test
55 | public void testDoubleStatisticsMerge() throws Exception {
56 | ObjectInspector inspector =
57 | PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
58 |
59 | ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
60 | ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
61 | stats1.updateDouble(10.0);
62 | stats1.updateDouble(100.0);
63 | stats2.updateDouble(1.0);
64 | stats2.updateDouble(1000.0);
65 | stats1.merge(stats2);
66 | DoubleColumnStatistics typed = (DoubleColumnStatistics) stats1;
67 | assertEquals(1.0, typed.getMinimum(), 0.001);
68 | assertEquals(1000.0, typed.getMaximum(), 0.001);
69 | }
70 |
71 | @Test
72 | public void testStringStatisticsMerge() throws Exception {
73 | ObjectInspector inspector =
74 | PrimitiveObjectInspectorFactory.javaStringObjectInspector;
75 |
76 | ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
77 | ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
78 | stats1.updateString("bob");
79 | stats1.updateString("david");
80 | stats1.updateString("charles");
81 | stats2.updateString("anne");
82 | stats2.updateString("erin");
83 | stats1.merge(stats2);
84 | StringColumnStatistics strStats = (StringColumnStatistics) stats1;
85 | assertEquals("anne", strStats.getMinimum());
86 | assertEquals("erin", strStats.getMaximum());
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/hive-dwrf/src/test/java/com/facebook/hive/orc/TestDynamicArray.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.facebook.hive.orc;
19 |
20 | import static org.junit.Assert.assertEquals;
21 | import static org.junit.Assert.assertTrue;
22 |
23 | import java.util.Random;
24 |
25 | import org.junit.Test;
26 |
27 | public class TestDynamicArray {
28 |
29 | @Test
30 | public void testByteArray() throws Exception {
31 | DynamicByteArray dba = new DynamicByteArray(10, new MemoryEstimate());
32 | dba.add((byte) 0);
33 | dba.add((byte) 1);
34 | dba.set(3, (byte) 3);
35 | dba.set(2, (byte) 2);
36 | dba.add((byte) 4);
37 | assertEquals("{0,1,2,3,4}", dba.toString());
38 | assertEquals(5, dba.size());
39 | byte[] val;
40 | val = new byte[0];
41 |
42 | assertEquals(0, dba.compare(val, 0, 0, 2, 0));
43 | assertTrue(dba.compare(val, 0, 0, 2, 1) < 0);
44 | val = new byte[]{3,42};
45 | assertTrue(dba.compare(val, 0, 1, 2, 0) > 0);
46 | assertTrue(dba.compare(val, 0, 1, 2, 1) > 0);
47 | assertEquals(0, dba.compare(val, 0, 1, 3, 1));
48 | assertTrue(dba.compare(val, 0, 1, 3, 2) < 0);
49 | assertTrue(dba.compare(val, 0, 2, 3, 1) > 0);
50 | val = new byte[256];
51 | for(int b=-128; b < 128; ++b) {
52 | dba.add((byte) b);
53 | val[b+128] = (byte) b;
54 | }
55 | assertEquals(0, dba.compare(val, 0, 256, 5, 256));
56 | assertTrue(dba.compare(val, 0, 1, 0, 1) > 0);
57 | assertTrue(dba.compare(val, 254, 1, 0, 1) > 0);
58 | assertTrue(dba.compare(val, 120, 1, 64, 1) > 0);
59 | val = new byte[1024];
60 | Random rand = new Random(1701);
61 | for(int i = 0; i < val.length; ++i) {
62 | rand.nextBytes(val);
63 | }
64 | dba.add(val, 0, 1024);
65 | assertEquals(1285, dba.size());
66 | assertEquals(0, dba.compare(val, 0, 1024, 261, 1024));
67 | }
68 |
69 | @Test
70 | public void testIntArray() throws Exception {
71 | DynamicIntArray dia = new DynamicIntArray(10, new MemoryEstimate());
72 | for(int i=0; i < 10000; ++i) {
73 | dia.add(2*i);
74 | }
75 | assertEquals(10000, dia.size());
76 | for(int i=0; i < 10000; ++i) {
77 | assertEquals(2*i, dia.get(i));
78 | }
79 | dia.clear();
80 | assertEquals(0, dia.size());
81 | dia.add(3);
82 | dia.add(12);
83 | dia.add(65);
84 | assertEquals("{3,12,65}", dia.toString());
85 | for(int i=0; i < 5; ++i) {
86 | dia.increment(i, 3);
87 | }
88 | assertEquals("{6,15,68,3,3}", dia.toString());
89 | }
90 |
91 | @Test
92 | public void testLongArray() throws Exception {
93 | DynamicLongArray dia = new DynamicLongArray(10, new MemoryEstimate());
94 | for(int i=0; i < 10000; ++i) {
95 | dia.add((long)(2*i) + (long)Integer.MAX_VALUE);
96 | }
97 | assertEquals(10000, dia.size());
98 | for(int i=0; i < 10000; ++i) {
99 | assertEquals((long)(2*i) + (long)Integer.MAX_VALUE, dia.get(i));
100 | }
101 | dia.clear();
102 | assertEquals(0, dia.size());
103 | dia.add(3);
104 | dia.add(12);
105 | dia.add(65);
106 | assertEquals("{3,12,65}", dia.toString());
107 | for(int i=0; i < 5; ++i) {
108 | dia.increment(i, 3);
109 | }
110 | assertEquals("{6,15,68,3,3}", dia.toString());
111 |
112 | }
113 | }
114 |
--------------------------------------------------------------------------------
/hive-dwrf/src/test/java/com/facebook/hive/orc/TestSerializationUtils.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.facebook.hive.orc;
19 |
20 | import org.junit.Test;
21 |
22 | import java.io.ByteArrayInputStream;
23 | import java.io.ByteArrayOutputStream;
24 |
25 | import static org.junit.Assert.assertEquals;
26 |
27 | public class TestSerializationUtils {
28 |
29 | @Test
30 | public void TestDoubles() throws Exception {
31 | ByteArrayOutputStream buffer = new ByteArrayOutputStream();
32 | SerializationUtils.writeDouble(buffer, 1343822337.759);
33 | assertEquals(1343822337.759,
34 | SerializationUtils.readDouble(new
35 | ByteArrayInputStream(buffer.toByteArray())), 0.0001);
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/hive-dwrf/src/test/java/com/facebook/hive/orc/TestStreamName.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | package com.facebook.hive.orc;
20 |
21 | import static org.junit.Assert.assertEquals;
22 |
23 | import org.junit.Test;
24 |
25 | public class TestStreamName {
26 |
27 | @Test
28 | public void test1() throws Exception {
29 | StreamName s1 = new StreamName(3, OrcProto.Stream.Kind.DATA);
30 | StreamName s2 = new StreamName(3,
31 | OrcProto.Stream.Kind.DICTIONARY_DATA);
32 | StreamName s3 = new StreamName(5, OrcProto.Stream.Kind.DATA);
33 | StreamName s4 = new StreamName(5,
34 | OrcProto.Stream.Kind.DICTIONARY_DATA);
35 | StreamName s1p = new StreamName(3, OrcProto.Stream.Kind.DATA);
36 | assertEquals(true, s1.equals(s1));
37 | assertEquals(false, s1.equals(s2));
38 | assertEquals(false, s1.equals(s3));
39 | assertEquals(true, s1.equals(s1p));
40 | assertEquals(true, s1.compareTo(null) < 0);
41 | assertEquals(false, s1.equals(null));
42 | assertEquals(true, s1.compareTo(s2) > 0);
43 | assertEquals(true, s2.compareTo(s3) < 0);
44 | assertEquals(true, s3.compareTo(s4) > 0);
45 | assertEquals(true, s4.compareTo(s1p) < 0);
46 | assertEquals(0, s1p.compareTo(s1));
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/hive-dwrf/src/test/java/com/facebook/hive/orc/TestZlib.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | package com.facebook.hive.orc;
20 |
21 | import com.facebook.hive.orc.compression.CompressionCodec;
22 | import com.facebook.hive.orc.compression.ZlibCodec;
23 | import org.junit.Test;
24 |
25 | import java.io.IOException;
26 | import java.nio.ByteBuffer;
27 |
28 | import static junit.framework.Assert.assertEquals;
29 | import static junit.framework.Assert.fail;
30 |
31 | public class TestZlib {
32 |
33 | @Test
34 | public void testNoOverflow() throws Exception {
35 | ByteBuffer in = ByteBuffer.allocate(10);
36 | ByteBuffer out = ByteBuffer.allocate(10);
37 | in.put(new byte[]{1,2,3,4,5,6,7,10});
38 | in.flip();
39 | CompressionCodec codec = new ZlibCodec();
40 | assertEquals(false, codec.compress(in, out, null));
41 | }
42 |
43 | @Test
44 | public void testCorrupt() throws Exception {
45 | ByteBuffer buf = ByteBuffer.allocate(1000);
46 | buf.put(new byte[]{127,-128,0,99,98,-1});
47 | buf.flip();
48 | CompressionCodec codec = new ZlibCodec();
49 | ByteBuffer out = ByteBuffer.allocate(1000);
50 | try {
51 | codec.decompress(buf, out);
52 | fail();
53 | } catch (IOException ioe) {
54 | // EXPECTED
55 | }
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/hive-dwrf/src/test/java/com/facebook/hive/orc/WriterImplWithForceFlush.java:
--------------------------------------------------------------------------------
1 | package com.facebook.hive.orc;
2 |
3 | import com.facebook.hive.orc.compression.CompressionKind;
4 | import org.apache.hadoop.conf.Configuration;
5 | import org.apache.hadoop.fs.FileSystem;
6 | import org.apache.hadoop.fs.Path;
7 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
8 |
9 | import java.io.IOException;
10 |
11 | public class WriterImplWithForceFlush extends WriterImpl {
12 | public WriterImplWithForceFlush(FileSystem fs, Path path, Configuration conf,
13 | ObjectInspector inspector, long stripeSize, CompressionKind compress, int bufferSize,
14 | int rowIndexStride, MemoryManager memoryManager) throws IOException {
15 | super(fs, path, conf, inspector, stripeSize, compress, bufferSize, rowIndexStride,
16 | memoryManager);
17 | }
18 |
19 | public void forceFlushStripe() throws IOException {
20 | flushStripe();
21 | }
22 | }
23 |
--------------------------------------------------------------------------------