├── logo ├── parquet-logos.pdf ├── parquet-logos_1.svg ├── parquet-logos_2.svg └── parquet-logos_3.svg ├── doc └── images │ ├── FileFormat.gif │ ├── FileLayout.gif │ ├── PageIndexLayout.png │ ├── FileLayoutBloomFilter1.png │ ├── FileLayoutBloomFilter2.png │ ├── FileLayoutEncryptionEF.png │ └── FileLayoutEncryptionPF.png ├── NOTICE ├── .gitignore ├── Makefile ├── .github └── PULL_REQUEST_TEMPLATE.md ├── licenses └── LICENSE.slf4j.txt ├── .travis.yml ├── CONTRIBUTING.md ├── dev ├── finalize-release ├── prepare-release.sh ├── source-release.sh ├── README.md └── merge_parquet_pr.py ├── src ├── main │ ├── java │ │ └── org │ │ │ └── apache │ │ │ └── parquet │ │ │ └── format │ │ │ ├── event │ │ │ ├── FieldConsumer.java │ │ │ ├── EventBasedThriftReader.java │ │ │ ├── Consumers.java │ │ │ └── TypedConsumer.java │ │ │ ├── LogicalTypes.java │ │ │ ├── InterningProtocol.java │ │ │ └── Util.java │ └── resources │ │ └── META-INF │ │ └── LICENSE └── test │ └── java │ └── org │ └── apache │ └── parquet │ └── format │ └── TestUtil.java ├── changelog.sh ├── PageIndex.md ├── pom.xml ├── LICENSE ├── CHANGES.md ├── README.md ├── Encodings.md ├── BloomFilter.md └── KEYS /logo/parquet-logos.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ALA/parquet-format/master/logo/parquet-logos.pdf -------------------------------------------------------------------------------- /doc/images/FileFormat.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ALA/parquet-format/master/doc/images/FileFormat.gif -------------------------------------------------------------------------------- /doc/images/FileLayout.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ALA/parquet-format/master/doc/images/FileLayout.gif -------------------------------------------------------------------------------- /doc/images/PageIndexLayout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ALA/parquet-format/master/doc/images/PageIndexLayout.png -------------------------------------------------------------------------------- /doc/images/FileLayoutBloomFilter1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ALA/parquet-format/master/doc/images/FileLayoutBloomFilter1.png -------------------------------------------------------------------------------- /doc/images/FileLayoutBloomFilter2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ALA/parquet-format/master/doc/images/FileLayoutBloomFilter2.png -------------------------------------------------------------------------------- /doc/images/FileLayoutEncryptionEF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ALA/parquet-format/master/doc/images/FileLayoutEncryptionEF.png -------------------------------------------------------------------------------- /doc/images/FileLayoutEncryptionPF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ALA/parquet-format/master/doc/images/FileLayoutEncryptionPF.png -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | 2 | Apache Parquet Format 3 | Copyright 2014 The Apache Software Foundation 4 | 5 | This product includes software developed at 6 | The Apache Software Foundation (http://www.apache.org/). 7 | 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | generated/* 19 | target 20 | dependency-reduced-pom.xml 21 | .classpath 22 | .project 23 | 24 | # IDE stuff 25 | .idea/ 26 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | .PHONY: doc 21 | 22 | thrift: 23 | mkdir -p generated 24 | thrift --gen cpp -o generated src/main/thrift/parquet.thrift 25 | thrift --gen java -o generated src/main/thrift/parquet.thrift 26 | 27 | %.html: %.md 28 | pandoc -f markdown_github -t html -o $@ $< 29 | 30 | doc: README.html PageIndex.html LogicalTypes.html 31 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Make sure you have checked _all_ steps below. 2 | 3 | ### Jira 4 | 5 | - [ ] My PR addresses the following [Parquet Jira](https://issues.apache.org/jira/browse/PARQUET/) issues and references them in the PR title. For example, "PARQUET-1234: My Parquet PR" 6 | - https://issues.apache.org/jira/browse/PARQUET-XXX 7 | - In case you are adding a dependency, check if the license complies with the [ASF 3rd Party License Policy](https://www.apache.org/legal/resolved.html#category-x). 8 | 9 | ### Commits 10 | 11 | - [ ] My commits all reference Jira issues in their subject lines. In addition, my commits follow the guidelines from "[How to write a good git commit message](http://chris.beams.io/posts/git-commit/)": 12 | 1. Subject is separated from body by a blank line 13 | 1. Subject is limited to 50 characters (not including Jira issue reference) 14 | 1. Subject does not end with a period 15 | 1. Subject uses the imperative mood ("add", not "adding") 16 | 1. Body wraps at 72 characters 17 | 1. Body explains "what" and "why", not "how" 18 | 19 | ### Documentation 20 | 21 | - [ ] In case of new functionality, my PR adds documentation that describes how to use it. 22 | - All the public functions and the classes in the PR contain Javadoc that explain what it does 23 | -------------------------------------------------------------------------------- /licenses/LICENSE.slf4j.txt: -------------------------------------------------------------------------------- 1 | The following is a copy of the SLF4J license: 2 | 3 | Copyright (c) 2004-2013 QOS.ch 4 | All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining 7 | a copy of this software and associated documentation files (the 8 | "Software"), to deal in the Software without restriction, including 9 | without limitation the rights to use, copy, modify, merge, publish, 10 | distribute, sublicense, and/or sell copies of the Software, and to 11 | permit persons to whom the Software is furnished to do so, subject to 12 | the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be 15 | included in all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 21 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | language: java 19 | 20 | jdk: 21 | - openjdk8 22 | - openjdk11 23 | 24 | before_install: 25 | - sudo apt-get update -qq 26 | - sudo apt-get install -qq protobuf-compiler 27 | - sudo apt-get install -qq libboost-dev libboost-test-dev libboost-program-options-dev libevent-dev automake libtool flex bison pkg-config g++ libssl-dev 28 | - wget -qO- https://archive.apache.org/dist/thrift/0.12.0/thrift-0.12.0.tar.gz | tar zxf - 29 | - cd thrift-0.12.0/ 30 | - chmod +x ./configure 31 | - ./configure --disable-gen-erl --disable-gen-hs --without-ruby --without-haskell --without-erlang --without-php --without-nodejs --without-java 32 | - sudo make install 33 | - cd .. 34 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | Recommendations and requirements for how to best contribute to Parquet. We strive to obey these as best as possible. As always, thanks for contributing--we hope these guidelines make it easier and shed some light on our approach and processes. 21 | 22 | ### Key branches 23 | - `master` has the latest stable changes 24 | 25 | ### Pull requests 26 | - Submit pull requests against the `master` branch 27 | - Try not to pollute your pull request with unintended changes--keep them simple and small 28 | 29 | ### License 30 | By contributing your code, you agree to license your contribution under the terms of the APLv2: 31 | https://github.com/apache/parquet-format/blob/master/LICENSE 32 | -------------------------------------------------------------------------------- /dev/finalize-release: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | set -e 22 | 23 | if [ -z "$3" ]; then 24 | cat < 26 | Example: $0 2.7.0 0 2.8.0 27 | EOF 28 | exit 1 29 | fi 30 | 31 | release_version="$1" 32 | release_tag="apache-parquet-format-$release_version" 33 | rc_tag="$release_tag-rc$2" 34 | new_development_version="$3-SNAPSHOT" 35 | 36 | git tag -am "Release Apache Parquet Format $release_version" "$release_tag" "$rc_tag" 37 | mvn --batch-mode release:update-versions -DdevelopmentVersion="$new_development_version" 38 | git commit -am 'Prepare for next development iteration' 39 | 40 | echo 41 | echo "Verify the release tag and the current development version then push the changes by running: git push --follow-tags" 42 | -------------------------------------------------------------------------------- /dev/prepare-release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | set -e 22 | 23 | [[ $# != 2 ]] && err="Incorrect number of arguments: $#" 24 | [[ -z $err ]] && ! [[ $1 =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] && err="Invalid release version: \"$1\"" 25 | [[ -z $err ]] && ! [[ $2 =~ ^[0-9]+$ ]] && err="Invalid rc number: \"$2\"" 26 | 27 | if [[ -n $err ]]; then 28 | cat < 31 | Example: $0 2.7.0 0 32 | EOF 33 | exit 1 34 | fi 35 | 36 | release_version="$1" 37 | new_development_version="$release_version-SNAPSHOT" 38 | 39 | tag="apache-parquet-format-$release_version-rc$2" 40 | 41 | mvn release:clean 42 | mvn release:prepare -Dtag="$tag" "-DreleaseVersion=$release_version" -DdevelopmentVersion="$new_development_version" 43 | 44 | echo "Finish staging binary artifacts by running: mvn release:perform" 45 | -------------------------------------------------------------------------------- /src/main/java/org/apache/parquet/format/event/FieldConsumer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package org.apache.parquet.format.event; 20 | 21 | import org.apache.thrift.TException; 22 | import org.apache.thrift.protocol.TProtocol; 23 | 24 | /** 25 | * To receive Thrift field events 26 | * 27 | * @author Julien Le Dem 28 | * @deprecated java code moved to the parquet-mr project: See org.apache.parquet:parquet-format-structures; Will be 29 | * removed from here 30 | */ 31 | @Deprecated 32 | public interface FieldConsumer { 33 | 34 | /** 35 | * called by the EventBasedThriftReader when reading a field from a Struct 36 | * @param protocol the underlying protocol 37 | * @param eventBasedThriftReader the reader to delegate to further calls. 38 | * @param id the id of the field 39 | * @param type the type of the field 40 | * @return the typed consumer to pass the value to 41 | * @throws TException 42 | */ 43 | public void consumeField(TProtocol protocol, EventBasedThriftReader eventBasedThriftReader, short id, byte type) throws TException; 44 | 45 | } -------------------------------------------------------------------------------- /logo/parquet-logos_1.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /dev/source-release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | if [ -z "$1" ]; then 22 | echo "Usage: $0 " 23 | exit 24 | fi 25 | 26 | if [ -z "$2" ]; then 27 | echo "Usage: $0 " 28 | exit 29 | fi 30 | 31 | version=$1 32 | rc=$2 33 | 34 | if [ -d tmp/ ]; then 35 | echo "Cannot run: tmp/ exists" 36 | exit 37 | fi 38 | 39 | tag=apache-parquet-format-$version 40 | tagrc=${tag}-rc${rc} 41 | 42 | echo "Preparing source for $tagrc" 43 | 44 | release_hash=`git rev-list "$tagrc" 2> /dev/null | head -n 1 ` 45 | 46 | if [ -z "$release_hash" ]; then 47 | echo "Cannot continue: unknown git tag: $tagrc" 48 | exit 49 | fi 50 | 51 | echo "Using commit $release_hash" 52 | 53 | tarball=$tag.tar.gz 54 | 55 | # be conservative and use the release hash, even though git produces the same 56 | # archive (identical hashes) using the scm tag 57 | git archive $release_hash --prefix $tag/ -o $tarball 58 | 59 | # sign the archive 60 | gpg --armor --output ${tarball}.asc --detach-sig $tarball 61 | shasum -a 512 $tarball > ${tarball}.sha512 62 | 63 | # check out the parquet RC folder 64 | svn co --depth=empty https://dist.apache.org/repos/dist/dev/parquet tmp 65 | 66 | # add the release candidate for the tag 67 | mkdir -p tmp/$tagrc 68 | cp ${tarball}* tmp/$tagrc 69 | svn add tmp/$tagrc 70 | svn ci -m "Apache Parquet Format $version RC${rc}" tmp/$tagrc 71 | 72 | # clean up 73 | rm -rf tmp 74 | 75 | echo "Success! The release candidate is available here:" 76 | echo " https://dist.apache.org/repos/dist/dev/parquet/$tagrc" 77 | echo "" 78 | echo "Commit SHA1: $release_hash" 79 | 80 | -------------------------------------------------------------------------------- /changelog.sh: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | GITHUB_REPO=Parquet/parquet-format 21 | OAUTH_FILE=~/.github_oauth_for_changelog 22 | if [ -f $OAUTH_FILE ] 23 | then 24 | token=`cat $OAUTH_FILE` 25 | else 26 | echo "Please create an oauth token here: https://github.com/settings/tokens/new" 27 | echo "Then paste it bellow (it will be saved in $OAUTH_FILE):" >&2 28 | read token >&2 29 | echo $token > $OAUTH_FILE 30 | chmod og-rwx $OAUTH_FILE 31 | fi 32 | TOKEN_HEADER="Authorization: token $token" 33 | 34 | curl -f -H "$TOKEN_HEADER" -s "https://api.github.com" > /dev/null 35 | if [ $? == 0 ] 36 | then 37 | echo "login successful" >&2 38 | else 39 | echo "login failed" >&2 40 | curl -H "$TOKEN_HEADER" -s "https://api.github.com" 41 | echo "if your OAUTH token needs to be replaced you can delete file $OAUTH_FILE" 42 | exit 1 43 | fi 44 | 45 | echo "# Parquet #" 46 | 47 | git log | grep -E "Merge pull request|prepare release" | while read l 48 | do 49 | release=`echo $l | grep "\[maven-release-plugin\] prepare release" | cut -d "-" -f 5` 50 | PR=`echo $l| grep -E -o "Merge pull request #[^ ]*" | cut -d "#" -f 2` 51 | if [ -n "$release" ] 52 | then 53 | echo 54 | echo "### Version $release ###" 55 | fi 56 | if [ -n "$PR" ] 57 | then 58 | JSON=`curl -H "$TOKEN_HEADER" -s https://api.github.com/repos/${GITHUB_REPO}/pulls/$PR | tr "\n" " "` 59 | DESC_RAW=$(echo $JSON | grep -Eo '"title":.*?[^\\]",' | cut -d "\"" -f 4- | head -n 1 | sed -e "s/\\\\//g") 60 | DESC=$(echo ${DESC_RAW%\",}) 61 | echo "* ISSUE [$PR](https://github.com/${GITHUB_REPO}/pull/$PR): ${DESC}" 62 | fi 63 | done 64 | 65 | -------------------------------------------------------------------------------- /src/test/java/org/apache/parquet/format/TestUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package org.apache.parquet.format; 20 | 21 | import static java.util.Arrays.asList; 22 | import static junit.framework.Assert.assertEquals; 23 | import static junit.framework.Assert.assertNull; 24 | import static org.apache.parquet.format.Util.readFileMetaData; 25 | import static org.apache.parquet.format.Util.writeFileMetaData; 26 | 27 | import java.io.ByteArrayInputStream; 28 | import java.io.ByteArrayOutputStream; 29 | 30 | import org.junit.Test; 31 | 32 | import org.apache.parquet.format.Util.DefaultFileMetaDataConsumer; 33 | /** 34 | * @deprecated java code moved to the parquet-mr project: See org.apache.parquet:parquet-format-structures; Will be 35 | * removed from here 36 | */ 37 | @Deprecated 38 | public class TestUtil { 39 | 40 | @Test 41 | public void testReadFileMetadata() throws Exception { 42 | ByteArrayOutputStream baos = new ByteArrayOutputStream(); 43 | FileMetaData md = new FileMetaData( 44 | 1, 45 | asList(new SchemaElement("foo")), 46 | 10, 47 | asList( 48 | new RowGroup( 49 | asList( 50 | new ColumnChunk(0), 51 | new ColumnChunk(1) 52 | ), 53 | 10, 54 | 5), 55 | new RowGroup( 56 | asList( 57 | new ColumnChunk(2), 58 | new ColumnChunk(3) 59 | ), 60 | 11, 61 | 5) 62 | ) 63 | ); 64 | writeFileMetaData(md , baos); 65 | FileMetaData md2 = readFileMetaData(in(baos)); 66 | FileMetaData md3 = new FileMetaData(); 67 | readFileMetaData(in(baos), new DefaultFileMetaDataConsumer(md3)); 68 | FileMetaData md4 = new FileMetaData(); 69 | readFileMetaData(in(baos), new DefaultFileMetaDataConsumer(md4), true); 70 | FileMetaData md5 = readFileMetaData(in(baos), true); 71 | FileMetaData md6 = readFileMetaData(in(baos), false); 72 | assertEquals(md, md2); 73 | assertEquals(md, md3); 74 | assertNull(md4.getRow_groups()); 75 | assertNull(md5.getRow_groups()); 76 | assertEquals(md4, md5); 77 | md4.setRow_groups(md.getRow_groups()); 78 | md5.setRow_groups(md.getRow_groups()); 79 | assertEquals(md, md4); 80 | assertEquals(md, md5); 81 | assertEquals(md4, md5); 82 | assertEquals(md, md6); 83 | } 84 | 85 | private ByteArrayInputStream in(ByteArrayOutputStream baos) { 86 | return new ByteArrayInputStream(baos.toByteArray()); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/main/java/org/apache/parquet/format/LogicalTypes.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.parquet.format; 21 | 22 | /** 23 | * Convenience instances of logical type classes. 24 | * @deprecated java code moved to the parquet-mr project: See org.apache.parquet:parquet-format-structures; Will be 25 | * removed from here 26 | */ 27 | @Deprecated 28 | public class LogicalTypes { 29 | public static class TimeUnits { 30 | public static final TimeUnit MILLIS = TimeUnit.MILLIS(new MilliSeconds()); 31 | public static final TimeUnit MICROS = TimeUnit.MICROS(new MicroSeconds()); 32 | } 33 | 34 | public static LogicalType DECIMAL(int scale, int precision) { 35 | return LogicalType.DECIMAL(new DecimalType(scale, precision)); 36 | } 37 | 38 | public static final LogicalType UTF8 = LogicalType.STRING(new StringType()); 39 | public static final LogicalType MAP = LogicalType.MAP(new MapType()); 40 | public static final LogicalType LIST = LogicalType.LIST(new ListType()); 41 | public static final LogicalType ENUM = LogicalType.ENUM(new EnumType()); 42 | public static final LogicalType DATE = LogicalType.DATE(new DateType()); 43 | public static final LogicalType TIME_MILLIS = LogicalType.TIME(new TimeType(true, TimeUnits.MILLIS)); 44 | public static final LogicalType TIME_MICROS = LogicalType.TIME(new TimeType(true, TimeUnits.MICROS)); 45 | public static final LogicalType TIMESTAMP_MILLIS = LogicalType.TIMESTAMP(new TimestampType(true, TimeUnits.MILLIS)); 46 | public static final LogicalType TIMESTAMP_MICROS = LogicalType.TIMESTAMP(new TimestampType(true, TimeUnits.MICROS)); 47 | public static final LogicalType INT_8 = LogicalType.INTEGER(new IntType((byte) 8, true)); 48 | public static final LogicalType INT_16 = LogicalType.INTEGER(new IntType((byte) 16, true)); 49 | public static final LogicalType INT_32 = LogicalType.INTEGER(new IntType((byte) 32, true)); 50 | public static final LogicalType INT_64 = LogicalType.INTEGER(new IntType((byte) 64, true)); 51 | public static final LogicalType UINT_8 = LogicalType.INTEGER(new IntType((byte) 8, false)); 52 | public static final LogicalType UINT_16 = LogicalType.INTEGER(new IntType((byte) 16, false)); 53 | public static final LogicalType UINT_32 = LogicalType.INTEGER(new IntType((byte) 32, false)); 54 | public static final LogicalType UINT_64 = LogicalType.INTEGER(new IntType((byte) 64, false)); 55 | public static final LogicalType UNKNOWN = LogicalType.UNKNOWN(new NullType()); 56 | public static final LogicalType JSON = LogicalType.JSON(new JsonType()); 57 | public static final LogicalType BSON = LogicalType.BSON(new BsonType()); 58 | } 59 | -------------------------------------------------------------------------------- /dev/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Parquet Developer Scripts 21 | This directory contains scripts useful to developers when packaging, 22 | testing, or committing to Parquet. 23 | 24 | Merging a pull request requires being a committer on the project. 25 | 26 | * How to merge a Pull request: 27 | have an apache and apache-github remote setup 28 | ``` 29 | git remote add apache-github git@github.com:apache/incubator-parquet-format.git 30 | git remote add apache https://git-wip-us.apache.org/repos/asf/incubator-parquet-format.git 31 | ``` 32 | run the following command 33 | ``` 34 | dev/merge_parquet_pr.py 35 | ``` 36 | 37 | Note: 38 | * The parent directory of your parquet repository must be called parquet-format 39 | * Without jira-python installed you'll have to close the JIRA manually 40 | 41 | example output: 42 | ``` 43 | Which pull request would you like to merge? (e.g. 34): 44 | ``` 45 | Type the pull request number (from https://github.com/apache/incubator-parquet-format/pulls) and hit enter. 46 | ``` 47 | === Pull Request #X === 48 | title Blah Blah Blah 49 | source repo/branch 50 | target master 51 | url https://api.github.com/repos/apache/incubator-parquet-format/pulls/X 52 | 53 | Proceed with merging pull request #3? (y/n): 54 | ``` 55 | If this looks good, type y and hit enter. 56 | ``` 57 | From git-wip-us.apache.org:/repos/asf/incubator-parquet-format.git 58 | * [new branch] master -> PR_TOOL_MERGE_PR_3_MASTER 59 | Switched to branch 'PR_TOOL_MERGE_PR_3_MASTER' 60 | 61 | Merge complete (local ref PR_TOOL_MERGE_PR_3_MASTER). Push to apache? (y/n): 62 | ``` 63 | A local branch with the merge has been created. 64 | type y and hit enter to push it to apache master 65 | ``` 66 | Counting objects: 67, done. 67 | Delta compression using up to 4 threads. 68 | Compressing objects: 100% (26/26), done. 69 | Writing objects: 100% (36/36), 5.32 KiB, done. 70 | Total 36 (delta 17), reused 0 (delta 0) 71 | To git-wip-us.apache.org:/repos/asf/incubator-parquet-format.git 72 | b767ac4..485658a PR_TOOL_MERGE_PR_X_MASTER -> master 73 | Restoring head pointer to b767ac4e 74 | Note: checking out 'b767ac4e'. 75 | 76 | You are in 'detached HEAD' state. You can look around, make experimental 77 | changes and commit them, and you can discard any commits you make in this 78 | state without impacting any branches by performing another checkout. 79 | 80 | If you want to create a new branch to retain commits you create, you may 81 | do so (now or later) by using -b with the checkout command again. Example: 82 | 83 | git checkout -b new_branch_name 84 | 85 | HEAD is now at b767ac4... Update README.md 86 | Deleting local branch PR_TOOL_MERGE_PR_X 87 | Deleting local branch PR_TOOL_MERGE_PR_X_MASTER 88 | Pull request #X merged! 89 | Merge hash: 485658a5 90 | 91 | Would you like to pick 485658a5 into another branch? (y/n): 92 | ``` 93 | For now just say n as we have 1 branch 94 | -------------------------------------------------------------------------------- /src/main/java/org/apache/parquet/format/event/EventBasedThriftReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package org.apache.parquet.format.event; 20 | 21 | import org.apache.thrift.TException; 22 | import org.apache.thrift.protocol.TField; 23 | import org.apache.thrift.protocol.TList; 24 | import org.apache.thrift.protocol.TMap; 25 | import org.apache.thrift.protocol.TProtocol; 26 | import org.apache.thrift.protocol.TSet; 27 | import org.apache.thrift.protocol.TType; 28 | 29 | import org.apache.parquet.format.event.TypedConsumer.ListConsumer; 30 | import org.apache.parquet.format.event.TypedConsumer.MapConsumer; 31 | import org.apache.parquet.format.event.TypedConsumer.SetConsumer; 32 | 33 | /** 34 | * Event based reader for Thrift 35 | * 36 | * @author Julien Le Dem 37 | * @deprecated java code moved to the parquet-mr project: See org.apache.parquet:parquet-format-structures; Will be 38 | * removed from here 39 | */ 40 | @Deprecated 41 | public final class EventBasedThriftReader { 42 | 43 | private final TProtocol protocol; 44 | 45 | /** 46 | * @param protocol the protocol to read from 47 | */ 48 | public EventBasedThriftReader(TProtocol protocol) { 49 | this.protocol = protocol; 50 | } 51 | 52 | /** 53 | * reads a Struct from the underlying protocol and passes the field events to the FieldConsumer 54 | * @param c the field consumer 55 | * @throws TException 56 | */ 57 | public void readStruct(FieldConsumer c) throws TException { 58 | protocol.readStructBegin(); 59 | readStructContent(c); 60 | protocol.readStructEnd(); 61 | } 62 | 63 | /** 64 | * reads the content of a struct (fields) from the underlying protocol and passes the events to c 65 | * @param c the field consumer 66 | * @throws TException 67 | */ 68 | public void readStructContent(FieldConsumer c) throws TException { 69 | TField field; 70 | while (true) { 71 | field = protocol.readFieldBegin(); 72 | if (field.type == TType.STOP) { 73 | break; 74 | } 75 | c.consumeField(protocol, this, field.id, field.type); 76 | } 77 | } 78 | 79 | /** 80 | * reads the set content (elements) from the underlying protocol and passes the events to the set event consumer 81 | * @param eventConsumer the consumer 82 | * @param tSet the set descriptor 83 | * @throws TException 84 | */ 85 | public void readSetContent(SetConsumer eventConsumer, TSet tSet) 86 | throws TException { 87 | for (int i = 0; i < tSet.size; i++) { 88 | eventConsumer.consumeElement(protocol, this, tSet.elemType); 89 | } 90 | } 91 | 92 | /** 93 | * reads the map content (key values) from the underlying protocol and passes the events to the map event consumer 94 | * @param eventConsumer the consumer 95 | * @param tMap the map descriptor 96 | * @throws TException 97 | */ 98 | public void readMapContent(MapConsumer eventConsumer, TMap tMap) 99 | throws TException { 100 | for (int i = 0; i < tMap.size; i++) { 101 | eventConsumer.consumeEntry(protocol, this, tMap.keyType, tMap.valueType); 102 | } 103 | } 104 | 105 | /** 106 | * reads a key-value pair 107 | * @param keyType the type of the key 108 | * @param keyConsumer the consumer for the key 109 | * @param valueType the type of the value 110 | * @param valueConsumer the consumer for the value 111 | * @throws TException 112 | */ 113 | public void readMapEntry(byte keyType, TypedConsumer keyConsumer, byte valueType, TypedConsumer valueConsumer) 114 | throws TException { 115 | keyConsumer.read(protocol, this, keyType); 116 | valueConsumer.read(protocol, this, valueType); 117 | } 118 | 119 | /** 120 | * reads the list content (elements) from the underlying protocol and passes the events to the list event consumer 121 | * @param eventConsumer the consumer 122 | * @param tList the list descriptor 123 | * @throws TException 124 | */ 125 | public void readListContent(ListConsumer eventConsumer, TList tList) 126 | throws TException { 127 | for (int i = 0; i < tList.size; i++) { 128 | eventConsumer.consumeElement(protocol, this, tList.elemType); 129 | } 130 | } 131 | } -------------------------------------------------------------------------------- /PageIndex.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # ColumnIndex Layout to Support Page Skipping 21 | 22 | This document describes the format for column index pages in the Parquet 23 | footer. These pages contain statistics for DataPages and can be used to skip 24 | pages when scanning data in ordered and unordered columns. 25 | 26 | ## Problem Statement 27 | In previous versions of the format, Statistics are stored for ColumnChunks in 28 | ColumnMetaData and for individual pages inside DataPageHeader structs. When 29 | reading pages, a reader had to process the page header to determine 30 | whether the page could be skipped based on the statistics. This means the reader 31 | had to access all pages in a column, thus likely reading most of the column 32 | data from disk. 33 | 34 | ## Goals 35 | 1. Make both range scans and point lookups I/O efficient by allowing direct 36 | access to pages based on their min and max values. In particular: 37 | 2. A single-row lookup in a row group based on the sort column of that row group 38 | will only read one data page per the retrieved column. 39 | * Range scans on the sort column will only need to read the exact data 40 | pages that contain relevant data. 41 | * Make other selective scans I/O efficient: if we have a very selective 42 | predicate on a non-sorting column, for the other retrieved columns we 43 | should only need to access data pages that contain matching rows. 44 | 3. No additional decoding effort for scans without selective predicates, e.g., 45 | full-row group scans. If a reader determines that it does not need to read 46 | the index data, it does not incur any overhead. 47 | 4. Index pages for sorted columns use minimal storage by storing only the 48 | boundary elements between pages. 49 | 50 | ## Non-Goals 51 | * Support for the equivalent of secondary indices, i.e., an index structure 52 | sorted on the key values over non-sorted data. 53 | 54 | 55 | ## Technical Approach 56 | 57 | We add two new per-column structures to the row group metadata: 58 | * ColumnIndex: this allows navigation to the pages of a column based on column 59 | values and is used to locate data pages that contain matching values for a 60 | scan predicate 61 | * OffsetIndex: this allows navigation by row index and is used to retrieve 62 | values for rows identified as matches via the ColumnIndex. Once rows of a 63 | column are skipped, the corresponding rows in the other columns have to be 64 | skipped. Hence the OffsetIndexes for each column in a RowGroup are stored 65 | together. 66 | 67 | The new index structures are stored separately from RowGroup, near the footer. 68 | This is done so that a reader does not have to pay the I/O and deserialization 69 | cost for reading them if it is not doing selective scans. The index structures' 70 | location and length are stored in ColumnChunk. 71 | 72 | ![Page Index Layout](doc/images/PageIndexLayout.png) 73 | 74 | Some observations: 75 | * We don't need to record the lower bound for the first page and the upper 76 | bound for the last page, because the row group Statistics can provide that. 77 | We still include those for the sake of uniformity, and the overhead should be 78 | negligible. 79 | * We store lower and upper bounds for the values of each page. These may be the 80 | actual minimum and maximum values found on a page, but can also be (more 81 | compact) values that do not exist on a page. For example, instead of storing 82 | ""Blart Versenwald III", a writer may set `min_values[i]="B"`, 83 | `max_values[i]="C"`. This allows writers to truncate large values and writers 84 | should use this to enforce some reasonable bound on the size of the index 85 | structures. 86 | * Readers that support ColumnIndex should not also use page statistics. The 87 | only reason to write page-level statistics when writing ColumnIndex structs 88 | is to support older readers (not recommended). 89 | 90 | For ordered columns, this allows a reader to find matching pages by performing 91 | a binary search in `min_values` and `max_values`. For unordered columns, a 92 | reader can find matching pages by sequentially reading `min_values` and 93 | `max_values`. 94 | 95 | For range scans, this approach can be extended to return ranges of rows, page 96 | indices, and page offsets to scan in each column. The reader can then 97 | initialize a scanner for each column and fast forward them to the start row of 98 | the scan. 99 | -------------------------------------------------------------------------------- /src/main/java/org/apache/parquet/format/InterningProtocol.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.parquet.format; 21 | 22 | import java.nio.ByteBuffer; 23 | 24 | import org.apache.thrift.TException; 25 | import org.apache.thrift.protocol.TField; 26 | import org.apache.thrift.protocol.TList; 27 | import org.apache.thrift.protocol.TMap; 28 | import org.apache.thrift.protocol.TMessage; 29 | import org.apache.thrift.protocol.TProtocol; 30 | import org.apache.thrift.protocol.TSet; 31 | import org.apache.thrift.protocol.TStruct; 32 | import org.apache.thrift.transport.TTransport; 33 | 34 | /** 35 | * TProtocol that interns the strings. 36 | * 37 | * @deprecated java code moved to the parquet-mr project: See org.apache.parquet:parquet-format-structures; Will be 38 | * removed from here 39 | */ 40 | @Deprecated 41 | public class InterningProtocol extends TProtocol { 42 | 43 | private final TProtocol delegate; 44 | 45 | public InterningProtocol(TProtocol delegate) { 46 | super(delegate.getTransport()); 47 | this.delegate = delegate; 48 | } 49 | 50 | public TTransport getTransport() { 51 | return delegate.getTransport(); 52 | } 53 | 54 | public void writeMessageBegin(TMessage message) throws TException { 55 | delegate.writeMessageBegin(message); 56 | } 57 | 58 | public void writeMessageEnd() throws TException { 59 | delegate.writeMessageEnd(); 60 | } 61 | 62 | public int hashCode() { 63 | return delegate.hashCode(); 64 | } 65 | 66 | public void writeStructBegin(TStruct struct) throws TException { 67 | delegate.writeStructBegin(struct); 68 | } 69 | 70 | public void writeStructEnd() throws TException { 71 | delegate.writeStructEnd(); 72 | } 73 | 74 | public void writeFieldBegin(TField field) throws TException { 75 | delegate.writeFieldBegin(field); 76 | } 77 | 78 | public void writeFieldEnd() throws TException { 79 | delegate.writeFieldEnd(); 80 | } 81 | 82 | public void writeFieldStop() throws TException { 83 | delegate.writeFieldStop(); 84 | } 85 | 86 | public void writeMapBegin(TMap map) throws TException { 87 | delegate.writeMapBegin(map); 88 | } 89 | 90 | public void writeMapEnd() throws TException { 91 | delegate.writeMapEnd(); 92 | } 93 | 94 | public void writeListBegin(TList list) throws TException { 95 | delegate.writeListBegin(list); 96 | } 97 | 98 | public void writeListEnd() throws TException { 99 | delegate.writeListEnd(); 100 | } 101 | 102 | public void writeSetBegin(TSet set) throws TException { 103 | delegate.writeSetBegin(set); 104 | } 105 | 106 | public void writeSetEnd() throws TException { 107 | delegate.writeSetEnd(); 108 | } 109 | 110 | public void writeBool(boolean b) throws TException { 111 | delegate.writeBool(b); 112 | } 113 | 114 | public void writeByte(byte b) throws TException { 115 | delegate.writeByte(b); 116 | } 117 | 118 | public void writeI16(short i16) throws TException { 119 | delegate.writeI16(i16); 120 | } 121 | 122 | public void writeI32(int i32) throws TException { 123 | delegate.writeI32(i32); 124 | } 125 | 126 | public void writeI64(long i64) throws TException { 127 | delegate.writeI64(i64); 128 | } 129 | 130 | public void writeDouble(double dub) throws TException { 131 | delegate.writeDouble(dub); 132 | } 133 | 134 | public void writeString(String str) throws TException { 135 | delegate.writeString(str); 136 | } 137 | 138 | public void writeBinary(ByteBuffer buf) throws TException { 139 | delegate.writeBinary(buf); 140 | } 141 | 142 | public TMessage readMessageBegin() throws TException { 143 | return delegate.readMessageBegin(); 144 | } 145 | 146 | public void readMessageEnd() throws TException { 147 | delegate.readMessageEnd(); 148 | } 149 | 150 | public TStruct readStructBegin() throws TException { 151 | return delegate.readStructBegin(); 152 | } 153 | 154 | public void readStructEnd() throws TException { 155 | delegate.readStructEnd(); 156 | } 157 | 158 | public TField readFieldBegin() throws TException { 159 | return delegate.readFieldBegin(); 160 | } 161 | 162 | public void readFieldEnd() throws TException { 163 | delegate.readFieldEnd(); 164 | } 165 | 166 | public TMap readMapBegin() throws TException { 167 | return delegate.readMapBegin(); 168 | } 169 | 170 | public void readMapEnd() throws TException { 171 | delegate.readMapEnd(); 172 | } 173 | 174 | public TList readListBegin() throws TException { 175 | return delegate.readListBegin(); 176 | } 177 | 178 | public void readListEnd() throws TException { 179 | delegate.readListEnd(); 180 | } 181 | 182 | public TSet readSetBegin() throws TException { 183 | return delegate.readSetBegin(); 184 | } 185 | 186 | public void readSetEnd() throws TException { 187 | delegate.readSetEnd(); 188 | } 189 | 190 | public boolean equals(Object obj) { 191 | return delegate.equals(obj); 192 | } 193 | 194 | public boolean readBool() throws TException { 195 | return delegate.readBool(); 196 | } 197 | 198 | public byte readByte() throws TException { 199 | return delegate.readByte(); 200 | } 201 | 202 | public short readI16() throws TException { 203 | return delegate.readI16(); 204 | } 205 | 206 | public int readI32() throws TException { 207 | return delegate.readI32(); 208 | } 209 | 210 | public long readI64() throws TException { 211 | return delegate.readI64(); 212 | } 213 | 214 | public double readDouble() throws TException { 215 | return delegate.readDouble(); 216 | } 217 | 218 | public String readString() throws TException { 219 | // this is where we intern the strings 220 | return delegate.readString().intern(); 221 | } 222 | 223 | public ByteBuffer readBinary() throws TException { 224 | return delegate.readBinary(); 225 | } 226 | 227 | public void reset() { 228 | delegate.reset(); 229 | } 230 | 231 | public String toString() { 232 | return delegate.toString(); 233 | } 234 | 235 | } 236 | -------------------------------------------------------------------------------- /logo/parquet-logos_2.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /logo/parquet-logos_3.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /src/main/java/org/apache/parquet/format/event/Consumers.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package org.apache.parquet.format.event; 20 | 21 | import static java.util.Collections.unmodifiableMap; 22 | 23 | import java.util.ArrayList; 24 | import java.util.Collections; 25 | import java.util.HashMap; 26 | import java.util.List; 27 | import java.util.Map; 28 | 29 | import org.apache.thrift.TBase; 30 | import org.apache.thrift.TException; 31 | import org.apache.thrift.TFieldIdEnum; 32 | import org.apache.thrift.protocol.TList; 33 | import org.apache.thrift.protocol.TProtocol; 34 | import org.apache.thrift.protocol.TProtocolUtil; 35 | 36 | import org.apache.parquet.format.event.Consumers.Consumer; 37 | import org.apache.parquet.format.event.TypedConsumer.BoolConsumer; 38 | import org.apache.parquet.format.event.TypedConsumer.ListConsumer; 39 | import org.apache.parquet.format.event.TypedConsumer.StructConsumer; 40 | 41 | /** 42 | * Entry point for reading thrift in a streaming fashion 43 | * 44 | * @author Julien Le Dem 45 | * @deprecated java code moved to the parquet-mr project: See org.apache.parquet:parquet-format-structures; Will be 46 | * removed from here 47 | */ 48 | @Deprecated 49 | public class Consumers { 50 | 51 | /** 52 | * To consume objects coming from a DelegatingFieldConsumer 53 | * @author Julien Le Dem 54 | * 55 | * @param the type of consumed objects 56 | */ 57 | public static interface Consumer { 58 | void consume(T t); 59 | } 60 | 61 | /** 62 | * Delegates reading the field to TypedConsumers. 63 | * There is one TypedConsumer per thrift type. 64 | * use {@link DelegatingFieldConsumer#onField(TFieldIdEnum, BoolConsumer)} et al. to consume specific thrift fields. 65 | * @see Consumers#fieldConsumer() 66 | * @author Julien Le Dem 67 | * 68 | */ 69 | public static class DelegatingFieldConsumer implements FieldConsumer { 70 | 71 | private final Map contexts; 72 | private final FieldConsumer defaultFieldEventConsumer; 73 | 74 | private DelegatingFieldConsumer(FieldConsumer defaultFieldEventConsumer, Map contexts) { 75 | this.defaultFieldEventConsumer = defaultFieldEventConsumer; 76 | this.contexts = unmodifiableMap(contexts); 77 | } 78 | 79 | private DelegatingFieldConsumer() { 80 | this(new SkippingFieldConsumer()); 81 | } 82 | 83 | private DelegatingFieldConsumer(FieldConsumer defaultFieldEventConsumer) { 84 | this(defaultFieldEventConsumer, Collections.emptyMap()); 85 | } 86 | 87 | public DelegatingFieldConsumer onField(TFieldIdEnum e, TypedConsumer typedConsumer) { 88 | Map newContexts = new HashMap(contexts); 89 | newContexts.put(e.getThriftFieldId(), typedConsumer); 90 | return new DelegatingFieldConsumer(defaultFieldEventConsumer, newContexts); 91 | } 92 | 93 | @Override 94 | public void consumeField( 95 | TProtocol protocol, EventBasedThriftReader reader, 96 | short id, byte type) throws TException { 97 | TypedConsumer delegate = contexts.get(id); 98 | if (delegate != null) { 99 | delegate.read(protocol, reader, type); 100 | } else { 101 | defaultFieldEventConsumer.consumeField(protocol, reader, id, type); 102 | } 103 | } 104 | } 105 | 106 | /** 107 | * call onField on the resulting DelegatingFieldConsumer to handle individual fields 108 | * @return a new DelegatingFieldConsumer 109 | */ 110 | public static DelegatingFieldConsumer fieldConsumer() { 111 | return new DelegatingFieldConsumer(); 112 | } 113 | 114 | /** 115 | * To consume a list of elements 116 | * @param c the type of the list content 117 | * @param consumer the consumer that will receive the list 118 | * @return a ListConsumer that can be passed to the DelegatingFieldConsumer 119 | */ 120 | public static > ListConsumer listOf(Class c, final Consumer> consumer) { 121 | class ListConsumer implements Consumer { 122 | List list; 123 | @Override 124 | public void consume(T t) { 125 | list.add(t); 126 | } 127 | } 128 | final ListConsumer co = new ListConsumer(); 129 | return new DelegatingListElementsConsumer(struct(c, co)) { 130 | @Override 131 | public void consumeList(TProtocol protocol, 132 | EventBasedThriftReader reader, TList tList) throws TException { 133 | co.list = new ArrayList(); 134 | super.consumeList(protocol, reader, tList); 135 | consumer.consume(co.list); 136 | } 137 | }; 138 | } 139 | 140 | /** 141 | * To consume list elements one by one 142 | * @param consumer the consumer that will read the elements 143 | * @return a ListConsumer that can be passed to the DelegatingFieldConsumer 144 | */ 145 | public static ListConsumer listElementsOf(TypedConsumer consumer) { 146 | return new DelegatingListElementsConsumer(consumer); 147 | } 148 | 149 | public static > StructConsumer struct(final Class c, final Consumer consumer) { 150 | return new TBaseStructConsumer(c, consumer); 151 | } 152 | } 153 | 154 | class SkippingFieldConsumer implements FieldConsumer { 155 | @Override 156 | public void consumeField(TProtocol protocol, EventBasedThriftReader reader, short id, byte type) throws TException { 157 | TProtocolUtil.skip(protocol, type); 158 | } 159 | } 160 | 161 | class DelegatingListElementsConsumer extends ListConsumer { 162 | 163 | private TypedConsumer elementConsumer; 164 | 165 | protected DelegatingListElementsConsumer(TypedConsumer consumer) { 166 | this.elementConsumer = consumer; 167 | } 168 | 169 | @Override 170 | public void consumeElement(TProtocol protocol, EventBasedThriftReader reader, byte elemType) throws TException { 171 | elementConsumer.read(protocol, reader, elemType); 172 | } 173 | } 174 | class TBaseStructConsumer> extends StructConsumer { 175 | 176 | private final Class c; 177 | private Consumer consumer; 178 | 179 | public TBaseStructConsumer(Class c, Consumer consumer) { 180 | this.c = c; 181 | this.consumer = consumer; 182 | } 183 | 184 | @Override 185 | public void consumeStruct(TProtocol protocol, EventBasedThriftReader reader) throws TException { 186 | T o = newObject(); 187 | o.read(protocol); 188 | consumer.consume(o); 189 | } 190 | 191 | protected T newObject() { 192 | try { 193 | return c.newInstance(); 194 | } catch (InstantiationException e) { 195 | throw new RuntimeException(c.getName(), e); 196 | } catch (IllegalAccessException e) { 197 | throw new RuntimeException(c.getName(), e); 198 | } 199 | } 200 | 201 | } -------------------------------------------------------------------------------- /src/main/java/org/apache/parquet/format/event/TypedConsumer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package org.apache.parquet.format.event; 20 | 21 | import static org.apache.thrift.protocol.TType.BOOL; 22 | import static org.apache.thrift.protocol.TType.BYTE; 23 | import static org.apache.thrift.protocol.TType.DOUBLE; 24 | import static org.apache.thrift.protocol.TType.I16; 25 | import static org.apache.thrift.protocol.TType.I32; 26 | import static org.apache.thrift.protocol.TType.I64; 27 | import static org.apache.thrift.protocol.TType.LIST; 28 | import static org.apache.thrift.protocol.TType.MAP; 29 | import static org.apache.thrift.protocol.TType.SET; 30 | import static org.apache.thrift.protocol.TType.STRING; 31 | import static org.apache.thrift.protocol.TType.STRUCT; 32 | 33 | import org.apache.thrift.TException; 34 | import org.apache.thrift.protocol.TList; 35 | import org.apache.thrift.protocol.TMap; 36 | import org.apache.thrift.protocol.TProtocol; 37 | import org.apache.thrift.protocol.TSet; 38 | 39 | /** 40 | * receive thrift events of a given type 41 | * 42 | * @author Julien Le Dem 43 | * @deprecated java code moved to the parquet-mr project: See org.apache.parquet:parquet-format-structures; Will be 44 | * removed from here 45 | */ 46 | @Deprecated 47 | abstract public class TypedConsumer { 48 | 49 | abstract public static class DoubleConsumer extends TypedConsumer { 50 | protected DoubleConsumer() { super(DOUBLE); } 51 | @Override 52 | final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException { 53 | this.consume(protocol.readDouble()); 54 | } 55 | abstract public void consume(double value); 56 | } 57 | 58 | abstract public static class ByteConsumer extends TypedConsumer { 59 | protected ByteConsumer() { super(BYTE); } 60 | @Override 61 | final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException { 62 | this.consume(protocol.readByte()); 63 | } 64 | abstract public void consume(byte value); 65 | } 66 | 67 | abstract public static class BoolConsumer extends TypedConsumer { 68 | protected BoolConsumer() { super(BOOL); } 69 | @Override 70 | final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException { 71 | this.consume(protocol.readBool()); 72 | } 73 | abstract public void consume(boolean value); 74 | } 75 | 76 | abstract public static class I32Consumer extends TypedConsumer { 77 | protected I32Consumer() { super(I32); } 78 | @Override 79 | final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException { 80 | this.consume(protocol.readI32()); 81 | } 82 | abstract public void consume(int value); 83 | } 84 | 85 | abstract public static class I64Consumer extends TypedConsumer { 86 | protected I64Consumer() { super(I64); } 87 | final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException { 88 | this.consume(protocol.readI64()); 89 | } 90 | abstract public void consume(long value); 91 | } 92 | 93 | abstract public static class I16Consumer extends TypedConsumer { 94 | protected I16Consumer() { super(I16); } 95 | @Override 96 | final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException { 97 | this.consume(protocol.readI16()); 98 | } 99 | abstract public void consume(short value); 100 | } 101 | 102 | abstract public static class StringConsumer extends TypedConsumer { 103 | protected StringConsumer() { super(STRING); } 104 | @Override 105 | final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException { 106 | this.consume(protocol.readString()); 107 | } 108 | abstract public void consume(String value); 109 | } 110 | 111 | abstract public static class StructConsumer extends TypedConsumer { 112 | protected StructConsumer() { super(STRUCT); } 113 | @Override 114 | final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException { 115 | this.consumeStruct(protocol, reader); 116 | } 117 | /** 118 | * can either delegate to the reader or read the struct from the protocol 119 | * reader.readStruct(fieldConsumer); 120 | * @param protocol the underlying protocol 121 | * @param reader the reader to delegate to 122 | * @throws TException 123 | */ 124 | abstract public void consumeStruct(TProtocol protocol, EventBasedThriftReader reader) throws TException; 125 | } 126 | 127 | abstract public static class ListConsumer extends TypedConsumer { 128 | protected ListConsumer() { super(LIST); } 129 | @Override 130 | final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException { 131 | this.consumeList(protocol, reader, protocol.readListBegin()); 132 | protocol.readListEnd(); 133 | } 134 | public void consumeList(TProtocol protocol, EventBasedThriftReader reader, TList tList) throws TException { 135 | reader.readListContent(this, tList); 136 | } 137 | /** 138 | * can either delegate to the reader or read the element from the protocol 139 | * @param protocol the underlying protocol 140 | * @param reader the reader to delegate to 141 | * @throws TException 142 | */ 143 | abstract public void consumeElement(TProtocol protocol, EventBasedThriftReader reader, byte elemType) throws TException; 144 | } 145 | 146 | abstract public static class SetConsumer extends TypedConsumer { 147 | protected SetConsumer() { super(SET); } 148 | @Override 149 | final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException { 150 | this.consumeSet(protocol, reader, protocol.readSetBegin()); 151 | protocol.readSetEnd(); 152 | } 153 | public void consumeSet(TProtocol protocol, EventBasedThriftReader reader, TSet tSet) throws TException { 154 | reader.readSetContent(this, tSet); 155 | } 156 | /** 157 | * can either delegate to the reader or read the set from the protocol 158 | * @param protocol the underlying protocol 159 | * @param reader the reader to delegate to 160 | * @throws TException 161 | */ 162 | abstract public void consumeElement( 163 | TProtocol protocol, EventBasedThriftReader reader, 164 | byte elemType) throws TException; 165 | } 166 | 167 | abstract public static class MapConsumer extends TypedConsumer { 168 | protected MapConsumer() { super(MAP); } 169 | @Override 170 | final void read(TProtocol protocol, EventBasedThriftReader reader) 171 | throws TException { 172 | this.consumeMap(protocol, reader , protocol.readMapBegin()); 173 | protocol.readMapEnd(); 174 | } 175 | public void consumeMap(TProtocol protocol, EventBasedThriftReader reader, TMap tMap) throws TException { 176 | reader.readMapContent(this, tMap); 177 | } 178 | /** 179 | * can either delegate to the reader or read the map entry from the protocol 180 | * @param protocol the underlying protocol 181 | * @param reader the reader to delegate to 182 | * @throws TException 183 | */ 184 | abstract public void consumeEntry( 185 | TProtocol protocol, EventBasedThriftReader reader, 186 | byte keyType, byte valueType) throws TException; 187 | } 188 | 189 | public final byte type; 190 | 191 | private TypedConsumer(byte type) { 192 | this.type = type; 193 | } 194 | 195 | final public void read(TProtocol protocol, EventBasedThriftReader reader, byte type) throws TException { 196 | if (this.type != type) { 197 | throw new TException( 198 | "Incorrect type in stream. " 199 | + "Expected " + this.type 200 | + " but got " + type); 201 | } 202 | this.read(protocol, reader); 203 | } 204 | 205 | abstract void read(TProtocol protocol, EventBasedThriftReader reader) throws TException; 206 | } -------------------------------------------------------------------------------- /src/main/java/org/apache/parquet/format/Util.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.parquet.format; 21 | 22 | import static org.apache.parquet.format.FileMetaData._Fields.CREATED_BY; 23 | import static org.apache.parquet.format.FileMetaData._Fields.KEY_VALUE_METADATA; 24 | import static org.apache.parquet.format.FileMetaData._Fields.NUM_ROWS; 25 | import static org.apache.parquet.format.FileMetaData._Fields.ROW_GROUPS; 26 | import static org.apache.parquet.format.FileMetaData._Fields.SCHEMA; 27 | import static org.apache.parquet.format.FileMetaData._Fields.VERSION; 28 | import static org.apache.parquet.format.event.Consumers.fieldConsumer; 29 | import static org.apache.parquet.format.event.Consumers.listElementsOf; 30 | import static org.apache.parquet.format.event.Consumers.listOf; 31 | import static org.apache.parquet.format.event.Consumers.struct; 32 | 33 | import java.io.IOException; 34 | import java.io.InputStream; 35 | import java.io.OutputStream; 36 | import java.util.List; 37 | 38 | import org.apache.thrift.TBase; 39 | import org.apache.thrift.TException; 40 | import org.apache.thrift.protocol.TCompactProtocol; 41 | import org.apache.thrift.protocol.TProtocol; 42 | import org.apache.thrift.transport.TIOStreamTransport; 43 | 44 | import org.apache.parquet.format.event.Consumers.Consumer; 45 | import org.apache.parquet.format.event.Consumers.DelegatingFieldConsumer; 46 | import org.apache.parquet.format.event.EventBasedThriftReader; 47 | import org.apache.parquet.format.event.TypedConsumer.I32Consumer; 48 | import org.apache.parquet.format.event.TypedConsumer.I64Consumer; 49 | import org.apache.parquet.format.event.TypedConsumer.StringConsumer; 50 | 51 | /** 52 | * Utility to read/write metadata 53 | * We use the TCompactProtocol to serialize metadata 54 | * 55 | * @deprecated java code moved to the parquet-mr project: See org.apache.parquet:parquet-format-structures; Will be 56 | * removed from here 57 | */ 58 | @Deprecated 59 | public class Util { 60 | 61 | public static void writeColumnIndex(ColumnIndex columnIndex, OutputStream to) throws IOException { 62 | write(columnIndex, to); 63 | } 64 | 65 | public static ColumnIndex readColumnIndex(InputStream from) throws IOException { 66 | return read(from, new ColumnIndex()); 67 | } 68 | 69 | public static void writeOffsetIndex(OffsetIndex offsetIndex, OutputStream to) throws IOException { 70 | write(offsetIndex, to); 71 | } 72 | 73 | public static OffsetIndex readOffsetIndex(InputStream from) throws IOException { 74 | return read(from, new OffsetIndex()); 75 | } 76 | 77 | public static void writePageHeader(PageHeader pageHeader, OutputStream to) throws IOException { 78 | write(pageHeader, to); 79 | } 80 | 81 | public static PageHeader readPageHeader(InputStream from) throws IOException { 82 | return read(from, new PageHeader()); 83 | } 84 | 85 | public static void writeFileMetaData(org.apache.parquet.format.FileMetaData fileMetadata, OutputStream to) throws IOException { 86 | write(fileMetadata, to); 87 | } 88 | 89 | public static FileMetaData readFileMetaData(InputStream from) throws IOException { 90 | return read(from, new FileMetaData()); 91 | } 92 | /** 93 | * reads the meta data from the stream 94 | * @param from the stream to read the metadata from 95 | * @param skipRowGroups whether row groups should be skipped 96 | * @return the resulting metadata 97 | * @throws IOException 98 | */ 99 | public static FileMetaData readFileMetaData(InputStream from, boolean skipRowGroups) throws IOException { 100 | FileMetaData md = new FileMetaData(); 101 | if (skipRowGroups) { 102 | readFileMetaData(from, new DefaultFileMetaDataConsumer(md), skipRowGroups); 103 | } else { 104 | read(from, md); 105 | } 106 | return md; 107 | } 108 | 109 | /** 110 | * To read metadata in a streaming fashion. 111 | */ 112 | public static abstract class FileMetaDataConsumer { 113 | abstract public void setVersion(int version); 114 | abstract public void setSchema(List schema); 115 | abstract public void setNumRows(long numRows); 116 | abstract public void addRowGroup(RowGroup rowGroup); 117 | abstract public void addKeyValueMetaData(KeyValue kv); 118 | abstract public void setCreatedBy(String createdBy); 119 | } 120 | 121 | /** 122 | * Simple default consumer that sets the fields 123 | */ 124 | public static final class DefaultFileMetaDataConsumer extends FileMetaDataConsumer { 125 | private final FileMetaData md; 126 | 127 | public DefaultFileMetaDataConsumer(FileMetaData md) { 128 | this.md = md; 129 | } 130 | 131 | @Override 132 | public void setVersion(int version) { 133 | md.setVersion(version); 134 | } 135 | 136 | @Override 137 | public void setSchema(List schema) { 138 | md.setSchema(schema); 139 | } 140 | 141 | @Override 142 | public void setNumRows(long numRows) { 143 | md.setNum_rows(numRows); 144 | } 145 | 146 | @Override 147 | public void setCreatedBy(String createdBy) { 148 | md.setCreated_by(createdBy); 149 | } 150 | 151 | @Override 152 | public void addRowGroup(RowGroup rowGroup) { 153 | md.addToRow_groups(rowGroup); 154 | } 155 | 156 | @Override 157 | public void addKeyValueMetaData(KeyValue kv) { 158 | md.addToKey_value_metadata(kv); 159 | } 160 | } 161 | 162 | public static void readFileMetaData(InputStream from, FileMetaDataConsumer consumer) throws IOException { 163 | readFileMetaData(from, consumer, false); 164 | } 165 | 166 | public static void readFileMetaData(InputStream from, final FileMetaDataConsumer consumer, boolean skipRowGroups) throws IOException { 167 | try { 168 | DelegatingFieldConsumer eventConsumer = fieldConsumer() 169 | .onField(VERSION, new I32Consumer() { 170 | @Override 171 | public void consume(int value) { 172 | consumer.setVersion(value); 173 | } 174 | }).onField(SCHEMA, listOf(SchemaElement.class, new Consumer>() { 175 | @Override 176 | public void consume(List schema) { 177 | consumer.setSchema(schema); 178 | } 179 | })).onField(NUM_ROWS, new I64Consumer() { 180 | @Override 181 | public void consume(long value) { 182 | consumer.setNumRows(value); 183 | } 184 | }).onField(KEY_VALUE_METADATA, listElementsOf(struct(KeyValue.class, new Consumer() { 185 | @Override 186 | public void consume(KeyValue kv) { 187 | consumer.addKeyValueMetaData(kv); 188 | } 189 | }))).onField(CREATED_BY, new StringConsumer() { 190 | @Override 191 | public void consume(String value) { 192 | consumer.setCreatedBy(value); 193 | } 194 | }); 195 | if (!skipRowGroups) { 196 | eventConsumer = eventConsumer.onField(ROW_GROUPS, listElementsOf(struct(RowGroup.class, new Consumer() { 197 | @Override 198 | public void consume(RowGroup rowGroup) { 199 | consumer.addRowGroup(rowGroup); 200 | } 201 | }))); 202 | } 203 | new EventBasedThriftReader(protocol(from)).readStruct(eventConsumer); 204 | 205 | } catch (TException e) { 206 | throw new IOException("can not read FileMetaData: " + e.getMessage(), e); 207 | } 208 | } 209 | 210 | private static TProtocol protocol(OutputStream to) { 211 | return protocol(new TIOStreamTransport(to)); 212 | } 213 | 214 | private static TProtocol protocol(InputStream from) { 215 | return protocol(new TIOStreamTransport(from)); 216 | } 217 | 218 | private static InterningProtocol protocol(TIOStreamTransport t) { 219 | return new InterningProtocol(new TCompactProtocol(t)); 220 | } 221 | 222 | private static > T read(InputStream from, T tbase) throws IOException { 223 | try { 224 | tbase.read(protocol(from)); 225 | return tbase; 226 | } catch (TException e) { 227 | throw new IOException("can not read " + tbase.getClass() + ": " + e.getMessage(), e); 228 | } 229 | } 230 | 231 | private static void write(TBase tbase, OutputStream to) throws IOException { 232 | try { 233 | tbase.write(protocol(to)); 234 | } catch (TException e) { 235 | throw new IOException("can not write " + tbase, e); 236 | } 237 | } 238 | } 239 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 21 | 4.0.0 22 | 23 | 24 | org.apache 25 | apache 26 | 16 27 | 28 | 29 | org.apache.parquet 30 | parquet-format 31 | 2.9.0-SNAPSHOT 32 | jar 33 | 34 | Apache Parquet Format 35 | https://parquet.apache.org/ 36 | Parquet is a columnar storage format that supports nested data. This provides all generated metadata code. 37 | 38 | 39 | scm:git:git@github.com:apache/parquet-format.git 40 | scm:git:git@github.com:apache/parquet-format.git 41 | scm:git:git@github.com:apache/parquet-format.git 42 | HEAD 43 | 44 | 45 | 46 | 48 | 49 | The Apache Software License, Version 2.0 50 | https://www.apache.org/licenses/LICENSE-2.0.txt 51 | 52 | 53 | 54 | 55 | JIRA 56 | https://issues.apache.org/jira/browse/PARQUET 57 | 58 | 59 | 60 | 61 | Dev Mailing List 62 | dev@parquet.apache.org 63 | dev-subscribe@parquet.apache.org 64 | dev-unsubscribe@parquet.apache.org 65 | 66 | 67 | Commits Mailing List 68 | commits@parquet.apache.org 69 | commits-subscribe@parquet.apache.org 70 | commits-unsubscribe@parquet.apache.org 71 | 72 | 73 | 74 | 75 | 76 | Julien Le Dem 77 | julien@twitter.com 78 | 79 | 80 | Nong Li 81 | nong@cloudera.com 82 | 83 | 84 | 85 | 86 | 1.8 87 | 1.8 88 | shaded.parquet 89 | thrift 90 | 0.12.0 91 | 0.10.0 92 | 93 | 94 | 95 | 96 | 97 | 98 | org.apache.thrift 99 | thrift-maven-plugin 100 | ${thrift-maven-plugin.version} 101 | 102 | src/main/thrift 103 | ${thrift.executable} 104 | 105 | 106 | 107 | thrift-sources 108 | generate-sources 109 | 110 | compile 111 | 112 | 113 | 114 | 115 | 116 | 117 | maven-assembly-plugin 118 | 119 | 120 | source-release-assembly 121 | none 122 | 123 | 124 | 125 | 126 | org.apache.maven.plugins 127 | maven-javadoc-plugin 128 | 129 | -Xdoclint:none 130 | 131 | 132 | 133 | 134 | org.apache.maven.plugins 135 | maven-compiler-plugin 136 | 137 | ${maven.compiler.source} 138 | ${maven.compiler.target} 139 | 140 | 141 | 142 | org.apache.maven.plugins 143 | maven-shade-plugin 144 | 2.2 145 | 146 | 147 | package 148 | 149 | shade 150 | 151 | 152 | 153 | 154 | org.apache.thrift:libthrift 155 | 156 | 157 | 158 | 159 | 160 | org.apache.thrift:libthrift 161 | 162 | **/*.java 163 | META-INF/LICENSE.txt 164 | META-INF/NOTICE.txt 165 | 166 | 167 | 168 | 169 | 170 | org.apache.thrift 171 | ${shade.prefix}.org.apache.thrift 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | org.apache.rat 180 | apache-rat-plugin 181 | 0.12 182 | 183 | 184 | test 185 | 186 | check 187 | 188 | 189 | 190 | 191 | true 192 | 193 | .github/PULL_REQUEST_TEMPLATE.md 194 | **/*.avro 195 | **/*.avsc 196 | **/*.avdl 197 | **/*.iml 198 | **/*.log 199 | **/*.md.vm 200 | **/.classpath 201 | **/.project 202 | **/.settings/** 203 | **/build/** 204 | **/target/** 205 | .git/** 206 | .idea/** 207 | */jdiff/*.xml 208 | licenses/** 209 | thrift-${thrift.version}/** 210 | thrift-${thrift.version}.tar.gz 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | org.slf4j 220 | slf4j-api 221 | 1.7.12 222 | 223 | 224 | org.apache.thrift 225 | libthrift 226 | ${thrift.version} 227 | 228 | 229 | javax.annotation 230 | javax.annotation-api 231 | 1.3.2 232 | 233 | 234 | junit 235 | junit 236 | 4.10 237 | test 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | !windows 246 | 247 | 248 | UnixClassOS 249 | 250 | 251 | 252 | org.codehaus.mojo 253 | exec-maven-plugin 254 | 1.2.1 255 | 256 | 257 | check-thrift-version 258 | generate-sources 259 | 260 | exec 261 | 262 | 263 | sh 264 | ${basedir} 265 | 266 | -c 267 | ${thrift.executable} -version | fgrep 'Thrift version ${thrift.version}' && exit 0; 268 | echo "================================================================================="; 269 | echo "========== [FATAL] Build is configured to require Thrift version ${thrift.version} =========="; 270 | echo -n "========== Currently installed: "; 271 | ${thrift.executable} -version; 272 | echo "================================================================================="; 273 | exit 1 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | 204 | -------------------------------------------------------------------------------- 205 | 206 | This product includes code from Apache Spark. 207 | 208 | * dev/merge_parquet_pr.py is based on Spark's dev/merge_spark_pr.py 209 | 210 | Copyright: 2014 The Apache Software Foundation. 211 | Home page: https://spark.apache.org/ 212 | License: http://www.apache.org/licenses/LICENSE-2.0 213 | 214 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Parquet # 21 | 22 | ### Version 2.8.0 ### 23 | 24 | #### New Feature 25 | 26 | * [PARQUET-1622](https://issues.apache.org/jira/browse/PARQUET-1622) - Add BYTE\_STREAM\_SPLIT encoding 27 | 28 | #### Improvement 29 | 30 | * [PARQUET-1672](https://issues.apache.org/jira/browse/PARQUET-1672) - \[DOC\] Broken link to "How To Contribute" section in Parquet-MR project 31 | * [PARQUET-1708](https://issues.apache.org/jira/browse/PARQUET-1708) - Fix Thrift compiler warning 32 | 33 | #### Task 34 | 35 | * [PARQUET-1687](https://issues.apache.org/jira/browse/PARQUET-1687) - Update release process 36 | * [PARQUET-1714](https://issues.apache.org/jira/browse/PARQUET-1714) - Release parquet format 2.8.0 37 | 38 | ### Version 2.7.0 ### 39 | 40 | #### Sub-task 41 | 42 | * [PARQUET-1592](https://issues.apache.org/jira/browse/PARQUET-1592) - update hash naming of bloom filter 43 | * [PARQUET-1619](https://issues.apache.org/jira/browse/PARQUET-1619) - Merge crypto spec and structures to format master 44 | * [PARQUET-1625](https://issues.apache.org/jira/browse/PARQUET-1625) - Update parquet thrift to align with spec 45 | * [PARQUET-1630](https://issues.apache.org/jira/browse/PARQUET-1630) - Resolve Bloom filter spec concerns 46 | 47 | #### Bug 48 | 49 | * [PARQUET-1437](https://issues.apache.org/jira/browse/PARQUET-1437) - Misleading comment in parquet.thrift 50 | * [PARQUET-1554](https://issues.apache.org/jira/browse/PARQUET-1554) - Compilation error when upgrading Scrooge version 51 | * [PARQUET-1561](https://issues.apache.org/jira/browse/PARQUET-1561) - Inconsistencies in the Parquet Delta Encoding specification 52 | 53 | #### New Feature 54 | 55 | * [PARQUET-41](https://issues.apache.org/jira/browse/PARQUET-41) - Add bloom filters to parquet statistics 56 | * [PARQUET-1178](https://issues.apache.org/jira/browse/PARQUET-1178) - Parquet modular encryption 57 | 58 | #### Improvement 59 | 60 | * [PARQUET-1462](https://issues.apache.org/jira/browse/PARQUET-1462) - Allow specifying new development version in prepare-release.sh 61 | * [PARQUET-1487](https://issues.apache.org/jira/browse/PARQUET-1487) - Do not write original type for timezone-agnostic timestamps 62 | * [PARQUET-1499](https://issues.apache.org/jira/browse/PARQUET-1499) - [parquet-mr] Add Java 11 to Travis 63 | * [PARQUET-1539](https://issues.apache.org/jira/browse/PARQUET-1539) - Clarify CRC checksum in page header 64 | * [PARQUET-1579](https://issues.apache.org/jira/browse/PARQUET-1579) - Add Github PR template 65 | * [PARQUET-1588](https://issues.apache.org/jira/browse/PARQUET-1588) - Bump Apache Thrift to 0.12.0 in parquet-format 66 | * [PARQUET-1589](https://issues.apache.org/jira/browse/PARQUET-1589) - Bump Java to 8 67 | * [PARQUET-1590](https://issues.apache.org/jira/browse/PARQUET-1590) - [parquet-format] Add Java 11 to Travis 68 | * [PARQUET-1591](https://issues.apache.org/jira/browse/PARQUET-1591) - Remove @author tags from the source 69 | * [PARQUET-1609](https://issues.apache.org/jira/browse/PARQUET-1609) - support xxhash in bloom filter 70 | * [PARQUET-1610](https://issues.apache.org/jira/browse/PARQUET-1610) - Small spelling issues 71 | * [PARQUET-1617](https://issues.apache.org/jira/browse/PARQUET-1617) - Add more details to bloom filter spec 72 | 73 | #### Task 74 | 75 | * [PARQUET-1433](https://issues.apache.org/jira/browse/PARQUET-1433) - Parquet-format doesn't compile with Thrift 0.10.0 76 | * [PARQUET-1572](https://issues.apache.org/jira/browse/PARQUET-1572) - Clarify the definition of timestamp types 77 | * [PARQUET-1585](https://issues.apache.org/jira/browse/PARQUET-1585) - Update old external links in the code base 78 | * [PARQUET-1627](https://issues.apache.org/jira/browse/PARQUET-1627) - Update specification so that legacy timestamp logical types can be written for local semantics as well 79 | 80 | ### Version 2.6.0 ### 81 | 82 | #### Bug 83 | 84 | * [PARQUET-1266](https://issues.apache.org/jira/browse/PARQUET-1266) - LogicalTypes union in parquet-format doesn't include UUID 85 | 86 | #### Improvement 87 | 88 | * [PARQUET-1290](https://issues.apache.org/jira/browse/PARQUET-1290) - Clarify maximum run lengths for RLE encoding 89 | * [PARQUET-1387](https://issues.apache.org/jira/browse/PARQUET-1387) - Nanosecond precision time and timestamp - parquet-format 90 | * [PARQUET-1400](https://issues.apache.org/jira/browse/PARQUET-1400) - Deprecate parquet-mr related code in parquet-format 91 | 92 | #### Task 93 | 94 | * [PARQUET-1429](https://issues.apache.org/jira/browse/PARQUET-1429) - Turn off DocLint on parquet-format 95 | 96 | ### Version 2.5.0 ### 97 | 98 | #### Bug 99 | 100 | * [PARQUET-323](https://issues.apache.org/jira/browse/PARQUET-323) - INT96 should be marked as deprecated 101 | * [PARQUET-1064](https://issues.apache.org/jira/browse/PARQUET-1064) - Deprecate type-defined sort ordering for INTERVAL type 102 | * [PARQUET-1065](https://issues.apache.org/jira/browse/PARQUET-1065) - Deprecate type-defined sort ordering for INT96 type 103 | * [PARQUET-1145](https://issues.apache.org/jira/browse/PARQUET-1145) - Add license to .gitignore and .travis.yml 104 | * [PARQUET-1156](https://issues.apache.org/jira/browse/PARQUET-1156) - dev/merge\_parquet\_pr.py problems 105 | * [PARQUET-1236](https://issues.apache.org/jira/browse/PARQUET-1236) - Upgrade org.slf4j:slf4j-api:1.7.2 to 1.7.12 106 | * [PARQUET-1242](https://issues.apache.org/jira/browse/PARQUET-1242) - parquet.thrift refers to wrong releases for the new compressions 107 | * [PARQUET-1251](https://issues.apache.org/jira/browse/PARQUET-1251) - Clarify ambiguous min/max stats for FLOAT/DOUBLE 108 | * [PARQUET-1258](https://issues.apache.org/jira/browse/PARQUET-1258) - Update scm developer connection to github 109 | 110 | #### New Feature 111 | 112 | * [PARQUET-1201](https://issues.apache.org/jira/browse/PARQUET-1201) - Write column indexes 113 | 114 | #### Improvement 115 | 116 | * [PARQUET-1171](https://issues.apache.org/jira/browse/PARQUET-1171) - \[C++\] Clarify valid uses for RLE, BIT_PACKED encodings 117 | * [PARQUET-1197](https://issues.apache.org/jira/browse/PARQUET-1197) - Log rat failures 118 | 119 | #### Task 120 | 121 | * [PARQUET-1234](https://issues.apache.org/jira/browse/PARQUET-1234) - Release Parquet format 2.5.0 122 | 123 | ### Version 2.4.0 ### 124 | 125 | #### Bug 126 | 127 | * [PARQUET-255](https://issues.apache.org/jira/browse/PARQUET-255) - Typo in decimal type specification 128 | * [PARQUET-322](https://issues.apache.org/jira/browse/PARQUET-322) - Document ENUM as a logical type 129 | * [PARQUET-412](https://issues.apache.org/jira/browse/PARQUET-412) - Format: Do not shade slf4j-api 130 | * [PARQUET-419](https://issues.apache.org/jira/browse/PARQUET-419) - Update dev script in parquet-cpp to remove incubator. 131 | * [PARQUET-655](https://issues.apache.org/jira/browse/PARQUET-655) - The LogicalTypes.md link in README.md points to the old Parquet GitHub repository 132 | * [PARQUET-1031](https://issues.apache.org/jira/browse/PARQUET-1031) - Fix spelling errors, whitespace, GitHub urls 133 | * [PARQUET-1032](https://issues.apache.org/jira/browse/PARQUET-1032) - Change link in Encodings.md for variable length encoding 134 | * [PARQUET-1050](https://issues.apache.org/jira/browse/PARQUET-1050) - The comment of Parquet Format Thrift definition file error 135 | * [PARQUET-1076](https://issues.apache.org/jira/browse/PARQUET-1076) - [Format] Switch to long key ids in KEYs file 136 | * [PARQUET-1091](https://issues.apache.org/jira/browse/PARQUET-1091) - Wrong and broken links in README 137 | * [PARQUET-1102](https://issues.apache.org/jira/browse/PARQUET-1102) - Travis CI builds are failing for parquet-format PRs 138 | * [PARQUET-1134](https://issues.apache.org/jira/browse/PARQUET-1134) - Release Parquet format 2.4.0 139 | * [PARQUET-1136](https://issues.apache.org/jira/browse/PARQUET-1136) - Makefile is broken 140 | 141 | #### Improvement 142 | 143 | * [PARQUET-371](https://issues.apache.org/jira/browse/PARQUET-371) - Bumps Thrift version to 0.9.3 144 | * [PARQUET-407](https://issues.apache.org/jira/browse/PARQUET-407) - Incorrect delta-encoding example 145 | * [PARQUET-428](https://issues.apache.org/jira/browse/PARQUET-428) - Support INT96 and FIXED_LEN_BYTE_ARRAY types 146 | * [PARQUET-601](https://issues.apache.org/jira/browse/PARQUET-601) - Add support in Parquet to configure the encoding used by ValueWriters 147 | * [PARQUET-609](https://issues.apache.org/jira/browse/PARQUET-609) - Add Brotli compression to Parquet format 148 | * [PARQUET-757](https://issues.apache.org/jira/browse/PARQUET-757) - Add NULL type to Bring Parquet logical types to par with Arrow 149 | * [PARQUET-804](https://issues.apache.org/jira/browse/PARQUET-804) - parquet-format README.md still links to the old Google group 150 | * [PARQUET-922](https://issues.apache.org/jira/browse/PARQUET-922) - Add index pages to the format to support efficient page skipping 151 | * [PARQUET-1049](https://issues.apache.org/jira/browse/PARQUET-1049) - Make thrift version a property in pom.xml 152 | 153 | #### Task 154 | 155 | * [PARQUET-450](https://issues.apache.org/jira/browse/PARQUET-450) - Small typos/issues in parquet-format documentation 156 | * [PARQUET-667](https://issues.apache.org/jira/browse/PARQUET-667) - Update committers lists to point to apache website 157 | * [PARQUET-1124](https://issues.apache.org/jira/browse/PARQUET-1124) - Add new compression codecs to the Parquet spec 158 | * [PARQUET-1125](https://issues.apache.org/jira/browse/PARQUET-1125) - Add UUID logical type 159 | 160 | ### Version 2.2.0 ### 161 | 162 | * [PARQUET-23](https://issues.apache.org/jira/browse/PARQUET-23): Rename packages and maven coordinates to org.apache 163 | * [PARQUET-119](https://issues.apache.org/jira/browse/PARQUET-119): Add encoding stats to ColumnMetaData 164 | * [PARQUET-79](https://issues.apache.org/jira/browse/PARQUET-79): Streaming thrift API 165 | * [PARQUET-12](https://issues.apache.org/jira/browse/PARQUET-12): New logical types 166 | 167 | ### Version 2.1.0 ### 168 | * ISSUE [84](https://github.com/Parquet/parquet-format/pull/84): Add metadata in the schema for storing decimals. 169 | * ISSUE [89](https://github.com/Parquet/parquet-format/pull/89): Added statistics to the data page header 170 | * ISSUE [86](https://github.com/Parquet/parquet-format/pull/86): Fix minor formatting, correct some wording under the "Error recovery" se... 171 | * ISSUE [82](https://github.com/Parquet/parquet-format/pull/82): exclude thrift source from jar 172 | * ISSUE [80](https://github.com/Parquet/parquet-format/pull/80): Upgrade maven-shade-plugin to 2.1 to compile with mvn 3.1.1 173 | 174 | ### Version 2.0.0 ### 175 | * ISSUE [79](https://github.com/Parquet/parquet-format/pull/79): Reorganize encodings and add details 176 | * ISSUE [78](https://github.com/Parquet/parquet-format/pull/78): Added sorted flag to dictionary page headers. 177 | * ISSUE [77](https://github.com/Parquet/parquet-format/pull/77): fix plugin versions 178 | * ISSUE [75](https://github.com/Parquet/parquet-format/pull/75): refactor dictionary encoding 179 | * ISSUE [64](https://github.com/Parquet/parquet-format/pull/64): new data page and stats 180 | * ISSUE [74](https://github.com/Parquet/parquet-format/pull/74): deprecate and remove group_var_int encoding 181 | * ISSUE [76](https://github.com/Parquet/parquet-format/pull/76): add mention of boolean on RLE 182 | * ISSUE [73](https://github.com/Parquet/parquet-format/pull/73): reformat encodings 183 | * ISSUE [71](https://github.com/Parquet/parquet-format/pull/71): refactor documentation for 2.0 encodings 184 | * ISSUE [66](https://github.com/Parquet/parquet-format/pull/66): Block strings 185 | * ISSUE [67](https://github.com/Parquet/parquet-format/pull/67): Add ENUM ConvertedType 186 | * ISSUE [58](https://github.com/Parquet/parquet-format/pull/58): Correct unterminated comment for SortingColumn. 187 | * ISSUE [51](https://github.com/Parquet/parquet-format/pull/51): Add metadata to specify row groups are sorted. 188 | 189 | ### Version 1.0.0 ### 190 | * ISSUE [46](https://github.com/Parquet/parquet-format/pull/46): Update readme to include 4 byte length in rle columns 191 | * ISSUE [47](https://github.com/Parquet/parquet-format/pull/47): fixed typo in readme.md 192 | * ISSUE [45](https://github.com/Parquet/parquet-format/pull/45): Typo in describing preferred row group size 193 | * ISSUE [43](https://github.com/Parquet/parquet-format/pull/43): add dictionary encoding details 194 | * ISSUE [41](https://github.com/Parquet/parquet-format/pull/41): Update readme with details about RLE encoding 195 | * ISSUE [39](https://github.com/Parquet/parquet-format/pull/39): Added created_by optional file metadata. 196 | * ISSUE [40](https://github.com/Parquet/parquet-format/pull/40): add details about the page size fields 197 | * ISSUE [35](https://github.com/Parquet/parquet-format/pull/35): this embeds and renames the thrift dependency in the jar, allowing people to use a different version of thrift in parallel 198 | * ISSUE [36](https://github.com/Parquet/parquet-format/pull/36): adding the encoding to the dictionary page 199 | * ISSUE [34](https://github.com/Parquet/parquet-format/pull/34): Corrected typo 200 | * ISSUE [32](https://github.com/Parquet/parquet-format/pull/32): Add layout diagram to README and fix typo 201 | * ISSUE [31](https://github.com/Parquet/parquet-format/pull/31): Restore encoding changes 202 | -------------------------------------------------------------------------------- /src/main/resources/META-INF/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | 204 | -------------------------------------------------------------------------------- 205 | 206 | This product depends on SLF4J and includes SLF4J in this binary artifact. SLF4J 207 | is a simple logging facade for Java. 208 | 209 | Copyright: 2004-2013 QOS.ch. 210 | Home page: http://www.slf4j.org/ 211 | License: http://slf4j.org/license.html (MIT license) 212 | 213 | The following is the SLF4J license (MIT): 214 | 215 | Copyright (c) 2004-2013 QOS.ch 216 | All rights reserved. 217 | 218 | Permission is hereby granted, free of charge, to any person obtaining 219 | a copy of this software and associated documentation files (the 220 | "Software"), to deal in the Software without restriction, including 221 | without limitation the rights to use, copy, modify, merge, publish, 222 | distribute, sublicense, and/or sell copies of the Software, and to 223 | permit persons to whom the Software is furnished to do so, subject to 224 | the following conditions: 225 | 226 | The above copyright notice and this permission notice shall be 227 | included in all copies or substantial portions of the Software. 228 | 229 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 230 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 231 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 232 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 233 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 234 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 235 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 236 | 237 | -------------------------------------------------------------------------------- 238 | 239 | This product depends on Apache Thrift and includes it in this binary artifact. 240 | 241 | Copyright: 2006-2010 The Apache Software Foundation. 242 | Home page: https://thrift.apache.org/ 243 | License: http://www.apache.org/licenses/LICENSE-2.0 244 | 245 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Parquet [![Build Status](https://travis-ci.org/apache/parquet-format.png?branch=master)](http://travis-ci.org/apache/parquet-format) 21 | 22 | Parquet is a columnar storage format that supports nested data. 23 | 24 | Parquet metadata is encoded using Apache Thrift. 25 | 26 | The `Parquet-format` project contains all Thrift definitions that are necessary to create readers 27 | and writers for Parquet files. 28 | 29 | ## Motivation 30 | 31 | We created Parquet to make the advantages of compressed, efficient columnar data representation available to any project in the Hadoop ecosystem. 32 | 33 | Parquet is built from the ground up with complex nested data structures in mind, and uses the [record shredding and assembly algorithm](https://github.com/julienledem/redelm/wiki/The-striping-and-assembly-algorithms-from-the-Dremel-paper) described in the Dremel paper. We believe this approach is superior to simple flattening of nested name spaces. 34 | 35 | Parquet is built to support very efficient compression and encoding schemes. Multiple projects have demonstrated the performance impact of applying the right compression and encoding scheme to the data. Parquet allows compression schemes to be specified on a per-column level, and is future-proofed to allow adding more encodings as they are invented and implemented. 36 | 37 | Parquet is built to be used by anyone. The Hadoop ecosystem is rich with data processing frameworks, and we are not interested in playing favorites. We believe that an efficient, well-implemented columnar storage substrate should be useful to all frameworks without the cost of extensive and difficult to set up dependencies. 38 | 39 | ## Modules 40 | 41 | The `parquet-format` project contains format specifications and Thrift definitions of metadata required to properly read Parquet files. 42 | 43 | The `parquet-mr` project contains multiple sub-modules, which implement the core components of reading and writing a nested, column-oriented data stream, map this core onto the parquet format, and provide Hadoop Input/Output Formats, Pig loaders, and other java-based utilities for interacting with Parquet. 44 | 45 | The `parquet-compatibility` project contains compatibility tests that can be used to verify that implementations in different languages can read and write each other's files. 46 | 47 | ## Building 48 | 49 | Java resources can be built using `mvn package`. The current stable version should always be available from Maven Central. 50 | 51 | C++ thrift resources can be generated via make. 52 | 53 | Thrift can be also code-generated into any other thrift-supported language. 54 | 55 | ## Glossary 56 | - Block (HDFS block): This means a block in HDFS and the meaning is 57 | unchanged for describing this file format. The file format is 58 | designed to work well on top of HDFS. 59 | 60 | - File: A HDFS file that must include the metadata for the file. 61 | It does not need to actually contain the data. 62 | 63 | - Row group: A logical horizontal partitioning of the data into rows. 64 | There is no physical structure that is guaranteed for a row group. 65 | A row group consists of a column chunk for each column in the dataset. 66 | 67 | - Column chunk: A chunk of the data for a particular column. They live 68 | in a particular row group and are guaranteed to be contiguous in the file. 69 | 70 | - Page: Column chunks are divided up into pages. A page is conceptually 71 | an indivisible unit (in terms of compression and encoding). There can 72 | be multiple page types which are interleaved in a column chunk. 73 | 74 | Hierarchically, a file consists of one or more row groups. A row group 75 | contains exactly one column chunk per column. Column chunks contain one or 76 | more pages. 77 | 78 | ## Unit of parallelization 79 | - MapReduce - File/Row Group 80 | - IO - Column chunk 81 | - Encoding/Compression - Page 82 | 83 | ## File format 84 | This file and the [thrift definition](src/main/thrift/parquet.thrift) should be read together to understand the format. 85 | 86 | 4-byte magic number "PAR1" 87 | 88 | 89 | ... 90 | 91 | 92 | 93 | ... 94 | 95 | ... 96 | 97 | 98 | ... 99 | 100 | File Metadata 101 | 4-byte length in bytes of file metadata (little endian) 102 | 4-byte magic number "PAR1" 103 | 104 | In the above example, there are N columns in this table, split into M row 105 | groups. The file metadata contains the locations of all the column metadata 106 | start locations. More details on what is contained in the metadata can be found 107 | in the thrift definition. 108 | 109 | Metadata is written after the data to allow for single pass writing. 110 | 111 | Readers are expected to first read the file metadata to find all the column 112 | chunks they are interested in. The columns chunks should then be read sequentially. 113 | 114 | ![File Layout](https://raw.github.com/apache/parquet-format/master/doc/images/FileLayout.gif) 115 | 116 | ## Metadata 117 | There are three types of metadata: file metadata, column (chunk) metadata and page 118 | header metadata. All thrift structures are serialized using the TCompactProtocol. 119 | 120 | ![Metadata diagram](https://github.com/apache/parquet-format/raw/master/doc/images/FileFormat.gif) 121 | 122 | ## Types 123 | The types supported by the file format are intended to be as minimal as possible, 124 | with a focus on how the types effect on disk storage. For example, 16-bit ints 125 | are not explicitly supported in the storage format since they are covered by 126 | 32-bit ints with an efficient encoding. This reduces the complexity of implementing 127 | readers and writers for the format. The types are: 128 | - BOOLEAN: 1 bit boolean 129 | - INT32: 32 bit signed ints 130 | - INT64: 64 bit signed ints 131 | - INT96: 96 bit signed ints 132 | - FLOAT: IEEE 32-bit floating point values 133 | - DOUBLE: IEEE 64-bit floating point values 134 | - BYTE_ARRAY: arbitrarily long byte arrays. 135 | 136 | ### Logical Types 137 | Logical types are used to extend the types that parquet can be used to store, 138 | by specifying how the primitive types should be interpreted. This keeps the set 139 | of primitive types to a minimum and reuses parquet's efficient encodings. For 140 | example, strings are stored as byte arrays (binary) with a UTF8 annotation. 141 | These annotations define how to further decode and interpret the data. 142 | Annotations are stored as `ConvertedType` fields in the file metadata and are 143 | documented in 144 | [LogicalTypes.md][logical-types]. 145 | 146 | [logical-types]: LogicalTypes.md 147 | 148 | ## Nested Encoding 149 | To encode nested columns, Parquet uses the Dremel encoding with definition and 150 | repetition levels. Definition levels specify how many optional fields in the 151 | path for the column are defined. Repetition levels specify at what repeated field 152 | in the path has the value repeated. The max definition and repetition levels can 153 | be computed from the schema (i.e. how much nesting there is). This defines the 154 | maximum number of bits required to store the levels (levels are defined for all 155 | values in the column). 156 | 157 | Two encodings for the levels are supported BIT_PACKED and RLE. Only RLE is now used as it supersedes BIT_PACKED. 158 | 159 | ## Nulls 160 | Nullity is encoded in the definition levels (which is run-length encoded). NULL values 161 | are not encoded in the data. For example, in a non-nested schema, a column with 1000 NULLs 162 | would be encoded with run-length encoding (0, 1000 times) for the definition levels and 163 | nothing else. 164 | 165 | ## Data Pages 166 | For data pages, the 3 pieces of information are encoded back to back, after the page 167 | header. 168 | In order we have: 169 | 1. repetition levels data 170 | 1. definition levels data 171 | 1. encoded values 172 | 173 | The value of `uncompressed_page_size` specified in the header is for all the 3 pieces combined. 174 | 175 | The encoded values for the data page is always required. The definition and repetition levels 176 | are optional, based on the schema definition. If the column is not nested (i.e. 177 | the path to the column has length 1), we do not encode the repetition levels (it would 178 | always have the value 1). For data that is required, the definition levels are 179 | skipped (if encoded, it will always have the value of the max definition level). 180 | 181 | For example, in the case where the column is non-nested and required, the data in the 182 | page is only the encoded values. 183 | 184 | The supported encodings are described in [Encodings.md](https://github.com/apache/parquet-format/blob/master/Encodings.md) 185 | 186 | ## Column chunks 187 | Column chunks are composed of pages written back to back. The pages share a common 188 | header and readers can skip over pages they are not interested in. The data for the 189 | page follows the header and can be compressed and/or encoded. The compression and 190 | encoding is specified in the page metadata. 191 | 192 | Additionally, files can contain an optional column index to allow readers to 193 | skip pages more efficiently. See [PageIndex.md](PageIndex.md) for details and 194 | the reasoning behind adding these to the format. 195 | 196 | ## Checksumming 197 | Data pages can be individually checksummed. This allows disabling of checksums at the 198 | HDFS file level, to better support single row lookups. Data page checksums are calculated 199 | using the standard CRC32 algorithm on the compressed data of a page (not including the 200 | page header itself). 201 | 202 | ## Error recovery 203 | If the file metadata is corrupt, the file is lost. If the column metadata is corrupt, 204 | that column chunk is lost (but column chunks for this column in other row groups are 205 | okay). If a page header is corrupt, the remaining pages in that chunk are lost. If 206 | the data within a page is corrupt, that page is lost. The file will be more 207 | resilient to corruption with smaller row groups. 208 | 209 | Potential extension: With smaller row groups, the biggest issue is placing the file 210 | metadata at the end. If an error happens while writing the file metadata, all the 211 | data written will be unreadable. This can be fixed by writing the file metadata 212 | every Nth row group. 213 | Each file metadata would be cumulative and include all the row groups written so 214 | far. Combining this with the strategy used for rc or avro files using sync markers, 215 | a reader could recover partially written files. 216 | 217 | ## Separating metadata and column data. 218 | The format is explicitly designed to separate the metadata from the data. This 219 | allows splitting columns into multiple files, as well as having a single metadata 220 | file reference multiple parquet files. 221 | 222 | ## Configurations 223 | - Row group size: Larger row groups allow for larger column chunks which makes it 224 | possible to do larger sequential IO. Larger groups also require more buffering in 225 | the write path (or a two pass write). We recommend large row groups (512MB - 1GB). 226 | Since an entire row group might need to be read, we want it to completely fit on 227 | one HDFS block. Therefore, HDFS block sizes should also be set to be larger. An 228 | optimized read setup would be: 1GB row groups, 1GB HDFS block size, 1 HDFS block 229 | per HDFS file. 230 | - Data page size: Data pages should be considered indivisible so smaller data pages 231 | allow for more fine grained reading (e.g. single row lookup). Larger page sizes 232 | incur less space overhead (less page headers) and potentially less parsing overhead 233 | (processing headers). Note: for sequential scans, it is not expected to read a page 234 | at a time; this is not the IO chunk. We recommend 8KB for page sizes. 235 | 236 | ## Extensibility 237 | There are many places in the format for compatible extensions: 238 | - File Version: The file metadata contains a version. 239 | - Encodings: Encodings are specified by enum and more can be added in the future. 240 | - Page types: Additional page types can be added and safely skipped. 241 | 242 | ## Contributing 243 | Comment on the issue and/or contact [the parquet-dev mailing list](http://mail-archives.apache.org/mod_mbox/parquet-dev/) with your questions and ideas. 244 | Changes to this core format definition are proposed and discussed in depth on the mailing list. You may also be interested in contributing to the Parquet-MR subproject, which contains all the Java-side implementation and APIs. See the "How To Contribute" section of the [Parquet-MR project](https://github.com/apache/parquet-mr#how-to-contribute) 245 | 246 | ## Code of Conduct 247 | 248 | We hold ourselves and the Parquet developer community to a code of conduct as described by [Twitter OSS](https://engineering.twitter.com/opensource): . 249 | 250 | ## License 251 | Copyright 2013 Twitter, Cloudera and other contributors. 252 | 253 | Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 254 | -------------------------------------------------------------------------------- /Encodings.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | Parquet encoding definitions 21 | ==== 22 | 23 | This file contains the specification of all supported encodings. 24 | 25 | ### Plain: (PLAIN = 0) 26 | 27 | Supported Types: all 28 | 29 | This is the plain encoding that must be supported for types. It is 30 | intended to be the simplest encoding. Values are encoded back to back. 31 | 32 | The plain encoding is used whenever a more efficient encoding can not be used. It 33 | stores the data in the following format: 34 | - BOOLEAN: [Bit Packed](#RLE), LSB first 35 | - INT32: 4 bytes little endian 36 | - INT64: 8 bytes little endian 37 | - INT96: 12 bytes little endian (deprecated) 38 | - FLOAT: 4 bytes IEEE little endian 39 | - DOUBLE: 8 bytes IEEE little endian 40 | - BYTE_ARRAY: length in 4 bytes little endian followed by the bytes contained in the array 41 | - FIXED_LEN_BYTE_ARRAY: the bytes contained in the array 42 | 43 | For native types, this outputs the data as little endian. Floating 44 | point types are encoded in IEEE. 45 | 46 | For the byte array type, it encodes the length as a 4 byte little 47 | endian, followed by the bytes. 48 | 49 | ### Dictionary Encoding (PLAIN_DICTIONARY = 2 and RLE_DICTIONARY = 8) 50 | The dictionary encoding builds a dictionary of values encountered in a given column. The 51 | dictionary will be stored in a dictionary page per column chunk. The values are stored as integers 52 | using the [RLE/Bit-Packing Hybrid](#RLE) encoding. If the dictionary grows too big, whether in size 53 | or number of distinct values, the encoding will fall back to the plain encoding. The dictionary page is 54 | written first, before the data pages of the column chunk. 55 | 56 | Dictionary page format: the entries in the dictionary - in dictionary order - using the [plain](#PLAIN) encoding. 57 | 58 | Data page format: the bit width used to encode the entry ids stored as 1 byte (max bit width = 32), 59 | followed by the values encoded using RLE/Bit packed described above (with the given bit width). 60 | 61 | Using the PLAIN_DICTIONARY enum value is deprecated in the Parquet 2.0 specification. Prefer using RLE_DICTIONARY 62 | in a data page and PLAIN in a dictionary page for Parquet 2.0+ files. 63 | 64 | ### Run Length Encoding / Bit-Packing Hybrid (RLE = 3) 65 | 66 | This encoding uses a combination of bit-packing and run length encoding to more efficiently store repeated values. 67 | 68 | The grammar for this encoding looks like this, given a fixed bit-width known in advance: 69 | ``` 70 | rle-bit-packed-hybrid: 71 | length := length of the in bytes stored as 4 bytes little endian (unsigned int32) 72 | encoded-data := * 73 | run := | 74 | bit-packed-run := 75 | bit-packed-header := varint-encode( << 1 | 1) 76 | // we always bit-pack a multiple of 8 values at a time, so we only store the number of values / 8 77 | bit-pack-scaled-run-len := (bit-packed-run-len) / 8 78 | bit-packed-run-len := *see 3 below* 79 | bit-packed-values := *see 1 below* 80 | rle-run := 81 | rle-header := varint-encode( (rle-run-len) << 1) 82 | rle-run-len := *see 3 below* 83 | repeated-value := value that is repeated, using a fixed-width of round-up-to-next-byte(bit-width) 84 | ``` 85 | 86 | 1. The bit-packing here is done in a different order than the one in the [deprecated bit-packing](#BITPACKED) encoding. 87 | The values are packed from the least significant bit of each byte to the most significant bit, 88 | though the order of the bits in each value remains in the usual order of most significant to least 89 | significant. For example, to pack the same values as the example in the deprecated encoding above: 90 | 91 | The numbers 1 through 7 using bit width 3: 92 | ``` 93 | dec value: 0 1 2 3 4 5 6 7 94 | bit value: 000 001 010 011 100 101 110 111 95 | bit label: ABC DEF GHI JKL MNO PQR STU VWX 96 | ``` 97 | 98 | would be encoded like this where spaces mark byte boundaries (3 bytes): 99 | ``` 100 | bit value: 10001000 11000110 11111010 101 | bit label: HIDEFABC RMNOJKLG VWXSTUPQ 102 | ``` 103 | 104 | The reason for this packing order is to have fewer word-boundaries on little-endian hardware 105 | when deserializing more than one byte at at time. This is because 4 bytes can be read into a 106 | 32 bit register (or 8 bytes into a 64 bit register) and values can be unpacked just by 107 | shifting and ORing with a mask. (to make this optimization work on a big-endian machine, 108 | you would have to use the ordering used in the [deprecated bit-packing](#BITPACKED) encoding) 109 | 110 | 2. varint-encode() is ULEB-128 encoding, see https://en.wikipedia.org/wiki/LEB128 111 | 112 | 3. bit-packed-run-len and rle-run-len must be in the range \[1, 231 - 1\]. 113 | This means that a Parquet implementation can always store the run length in a signed 114 | 32-bit integer. This length restriction was not part of the Parquet 2.5.0 and earlier 115 | specifications, but longer runs were not readable by the most common Parquet 116 | implementations so, in practice, were not safe for Parquet writers to emit. 117 | 118 | 119 | Note that the RLE encoding method is only supported for the following types of 120 | data: 121 | 122 | * Repetition and definition levels 123 | * Dictionary indices 124 | * Boolean values in data pages, as an alternative to PLAIN encoding 125 | 126 | ### Bit-packed (Deprecated) (BIT_PACKED = 4) 127 | 128 | This is a bit-packed only encoding, which is deprecated and will be replaced by the [RLE/bit-packing](#RLE) hybrid encoding. 129 | Each value is encoded back to back using a fixed width. 130 | There is no padding between values (except for the last byte) which is padded with 0s. 131 | For example, if the max repetition level was 3 (2 bits) and the max definition level as 3 132 | (2 bits), to encode 30 values, we would have 30 * 2 = 60 bits = 8 bytes. 133 | 134 | This implementation is deprecated because the [RLE/bit-packing](#RLE) hybrid is a superset of this implementation. 135 | For compatibility reasons, this implementation packs values from the most significant bit to the least significant bit, 136 | which is not the same as the [RLE/bit-packing](#RLE) hybrid. 137 | 138 | For example, the numbers 1 through 7 using bit width 3: 139 | ``` 140 | dec value: 0 1 2 3 4 5 6 7 141 | bit value: 000 001 010 011 100 101 110 111 142 | bit label: ABC DEF GHI JKL MNO PQR STU VWX 143 | ``` 144 | would be encoded like this where spaces mark byte boundaries (3 bytes): 145 | ``` 146 | bit value: 00000101 00111001 01110111 147 | bit label: ABCDEFGH IJKLMNOP QRSTUVWX 148 | ``` 149 | 150 | Note that the BIT_PACKED encoding method is only supported for encoding 151 | repetition and definition levels. 152 | 153 | ### Delta Encoding (DELTA_BINARY_PACKED = 5) 154 | Supported Types: INT32, INT64 155 | 156 | This encoding is adapted from the Binary packing described in ["Decoding billions of integers per second through vectorization"](http://arxiv.org/pdf/1209.2137v5.pdf) by D. Lemire and L. Boytsov. 157 | 158 | In delta encoding we make use of variable length integers for storing various numbers (not the deltas themselves). For unsigned values, we use ULEB128, which is the unsigned version of LEB128 (https://en.wikipedia.org/wiki/LEB128#Unsigned_LEB128). For signed values, we use zigzag encoding (https://developers.google.com/protocol-buffers/docs/encoding#signed-integers) to map negative values to positive ones and apply ULEB128 on the result. 159 | 160 | Delta encoding consists of a header followed by blocks of delta encoded values binary packed. Each block is made of miniblocks, each of them binary packed with its own bit width. 161 | 162 | The header is defined as follows: 163 | ``` 164 | 165 | ``` 166 | * the block size is a multiple of 128; it is stored as a ULEB128 int 167 | * the miniblock count per block is a divisor of the block size such that their quotient, the number of values in a miniblock, is a multiple of 32; it is stored as a ULEB128 int 168 | * the total value count is stored as a ULEB128 int 169 | * the first value is stored as a zigzag ULEB128 int 170 | 171 | Each block contains 172 | ``` 173 | 174 | ``` 175 | * the min delta is a zigzag ULEB128 int (we compute a minimum as we need positive integers for bit packing) 176 | * the bitwidth of each block is stored as a byte 177 | * each miniblock is a list of bit packed ints according to the bit width stored at the begining of the block 178 | 179 | To encode a block, we will: 180 | 181 | 1. Compute the differences between consecutive elements. For the first element in the block, use the last element in the previous block or, in the case of the first block, use the first value of the whole sequence, stored in the header. 182 | 183 | 2. Compute the frame of reference (the minimum of the deltas in the block). Subtract this min delta from all deltas in the block. This guarantees that all values are non-negative. 184 | 185 | 3. Encode the frame of reference (min delta) as a zigzag ULEB128 int followed by the bit widths of the miniblocks and the delta values (minus the min delta) bit packed per miniblock. 186 | 187 | Having multiple blocks allows us to adapt to changes in the data by changing the frame of reference (the min delta) which can result in smaller values after the subtraction which, again, means we can store them with a lower bit width. 188 | 189 | If there are not enough values to fill the last miniblock, we pad the miniblock so that its length is always the number of values in a full miniblock multiplied by the bit width. The values of the padding bits should be zero, but readers must accept paddings consisting of arbitrary bits as well. 190 | 191 | If, in the last block, less than `````` miniblocks are needed to store the values, the bytes storing the bit widths of the unneeded miniblocks are still present, their value should be zero, but readers must accept arbitrary values as well. There are no additional padding bytes for the miniblock bodies though, as if their bit widths were 0 (regardless of the actual byte values). The reader knows when to stop reading by keeping track of the number of values read. 192 | 193 | The following examples use 8 as the block size to keep the examples short, but in real cases it would be invalid. 194 | #### Example 1 195 | 1, 2, 3, 4, 5 196 | 197 | After step 1), we compute the deltas as: 198 | 199 | 1, 1, 1, 1 200 | 201 | The minimum delta is 1 and after step 2, the deltas become 202 | 203 | 0, 0, 0, 0 204 | 205 | The final encoded data is: 206 | 207 | header: 208 | 8 (block size), 1 (miniblock count), 5 (value count), 1 (first value) 209 | 210 | block 211 | 1 (minimum delta), 0 (bitwidth), (no data needed for bitwidth 0) 212 | 213 | #### Example 2 214 | 7, 5, 3, 1, 2, 3, 4, 5, the deltas would be 215 | 216 | -2, -2, -2, 1, 1, 1, 1 217 | 218 | The minimum is -2, so the relative deltas are: 219 | 220 | 0, 0, 0, 3, 3, 3, 3 221 | 222 | The encoded data is 223 | 224 | header: 225 | 8 (block size), 1 (miniblock count), 8 (value count), 7 (first value) 226 | 227 | block 228 | -2 (minimum delta), 2 (bitwidth), 00000011111111b (0,0,0,3,3,3,3 packed on 2 bits) 229 | 230 | #### Characteristics 231 | This encoding is similar to the [RLE/bit-packing](#RLE) encoding. However the [RLE/bit-packing](#RLE) encoding is specifically used when the range of ints is small over the entire page, as is true of repetition and definition levels. It uses a single bit width for the whole page. 232 | The delta encoding algorithm described above stores a bit width per miniblock and is less sensitive to variations in the size of encoded integers. It is also somewhat doing RLE encoding as a block containing all the same values will be bit packed to a zero bit width thus being only a header. 233 | 234 | ### Delta-length byte array: (DELTA_LENGTH_BYTE_ARRAY = 6) 235 | 236 | Supported Types: BYTE_ARRAY 237 | 238 | This encoding is always preferred over PLAIN for byte array columns. 239 | 240 | For this encoding, we will take all the byte array lengths and encode them using delta 241 | encoding (DELTA_BINARY_PACKED). The byte array data follows all of the length data just 242 | concatenated back to back. The expected savings is from the cost of encoding the lengths 243 | and possibly better compression in the data (it is no longer interleaved with the lengths). 244 | 245 | The data stream looks like: 246 | 247 | 248 | 249 | For example, if the data was "Hello", "World", "Foobar", "ABCDEF": 250 | 251 | The encoded data would be DeltaEncoding(5, 5, 6, 6) "HelloWorldFoobarABCDEF" 252 | 253 | ### Delta Strings: (DELTA_BYTE_ARRAY = 7) 254 | 255 | Supported Types: BYTE_ARRAY 256 | 257 | This is also known as incremental encoding or front compression: for each element in a 258 | sequence of strings, store the prefix length of the previous entry plus the suffix. 259 | 260 | For a longer description, see https://en.wikipedia.org/wiki/Incremental_encoding. 261 | 262 | This is stored as a sequence of delta-encoded prefix lengths (DELTA_BINARY_PACKED), followed by 263 | the suffixes encoded as delta length byte arrays (DELTA_LENGTH_BYTE_ARRAY). 264 | 265 | ### Byte Stream Split: (BYTE_STREAM_SPLIT = 9) 266 | 267 | Supported Types: FLOAT DOUBLE 268 | 269 | This encoding does not reduce the size of the data but can lead to a significantly better 270 | compression ratio and speed when a compression algorithm is used afterwards. 271 | 272 | This encoding creates K byte-streams of length N where K is the size in bytes of the data 273 | type and N is the number of elements in the data sequence. 274 | The bytes of each value are scattered to the corresponding streams. The 0-th byte goes to the 275 | 0-th stream, the 1-st byte goes to the 1-st stream and so on. 276 | The streams are concatenated in the following order: 0-th stream, 1-st stream, etc. 277 | 278 | Example: 279 | Original data is three 32-bit floats and for simplicity we look at their raw representation. 280 | ``` 281 | Element 0 Element 1 Element 2 282 | Bytes AA BB CC DD 00 11 22 33 A3 B4 C5 D6 283 | ``` 284 | After applying the transformation, the data has the following representation: 285 | ``` 286 | Bytes AA 00 A3 BB 11 B4 CC 22 C5 DD 33 D6 287 | ``` 288 | -------------------------------------------------------------------------------- /BloomFilter.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | Parquet Bloom Filter 21 | === 22 | ### Problem statement 23 | In their current format, column statistics and dictionaries can be used for predicate 24 | pushdown. Statistics include minimum and maximum value, which can be used to filter out 25 | values not in the range. Dictionaries are more specific, and readers can filter out values 26 | that are between min and max but not in the dictionary. However, when there are too many 27 | distinct values, writers sometimes choose not to add dictionaries because of the extra 28 | space they occupy. This leaves columns with large cardinalities and widely separated min 29 | and max without support for predicate pushdown. 30 | 31 | A [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) is a compact data structure that 32 | overapproximates a set. It can respond to membership queries with either "definitely no" or 33 | "probably yes", where the probability of false positives is configured when the filter is 34 | initialized. Bloom filters do not have false negatives. 35 | 36 | Because Bloom filters are small compared to dictionaries, they can be used for predicate 37 | pushdown even in columns with high cardinality and when space is at a premium. 38 | 39 | ### Goal 40 | * Enable predicate pushdown for high-cardinality columns while using less space than 41 | dictionaries. 42 | 43 | * Induce no additional I/O overhead when executing queries on columns without Bloom 44 | filters attached or when executing non-selective queries. 45 | 46 | ### Technical Approach 47 | 48 | The section describes split block Bloom filters, which is the first 49 | (and, at time of writing, only) Bloom filter representation supported 50 | in Parquet. 51 | 52 | First we will describe a "block". This is the main component split 53 | block Bloom filters are composed of. 54 | 55 | Each block is 256 bits, broken up into eight contiguous "words", each 56 | consisting of 32 bits. Each word is thought of as an array of bits; 57 | each bit is either "set" or "not set". 58 | 59 | When initialized, a block is "empty", which means each of the eight 60 | component words has no bits set. In addition to initialization, a 61 | block supports two other operations: `block_insert` and 62 | `block_check`. Both take a single unsigned 32-bit integer as input; 63 | `block_insert` returns no value, but modifies the block, while 64 | `block_check` returns a boolean. The semantics of `block_check` are 65 | that it must return `true` if `block_insert` was previously called on 66 | the block with the same argument, and otherwise it returns `false` 67 | with high probability. For more details of the probability, see below. 68 | 69 | The operations `block_insert` and `block_check` depend on some 70 | auxiliary artifacts. First, there is a sequence of eight odd unsigned 71 | 32-bit integer constants called the `salt`. Second, there is a method 72 | called `mask` that takes as its argument a single unsigned 32-bit 73 | integer and returns a block in which each word has exactly one bit 74 | set. 75 | 76 | ``` 77 | unsigned int32 salt[8] = {0x47b6137bU, 0x44974d91U, 0x8824ad5bU, 78 | 0xa2b7289dU, 0x705495c7U, 0x2df1424bU, 79 | 0x9efc4947U, 0x5c6bfb31U} 80 | 81 | block mask(unsigned int32 x) { 82 | block result 83 | for i in [0..7] { 84 | unsigned int32 y = x * salt[i] 85 | result.getWord(i).setBit(y >> 27) 86 | } 87 | return result 88 | } 89 | ``` 90 | 91 | Since there are eight words in the block and eight integers in the 92 | salt, there is a correspondence between them. To set a bit in the nth 93 | word of the block, `mask` first multiplies its argument by the nth 94 | integer in the `salt`, keeping only the least significant 32 bits of 95 | the 64-bit product, then divides that 32-bit unsigned integer by 2 to 96 | the 27th power, denoted above using the C language's right shift 97 | operator "`>>`". The resulting integer is between 0 and 31, 98 | inclusive. That integer is the bit that gets set in the word in the 99 | block. 100 | 101 | From the `mask` operation, `block_insert` is defined as setting every 102 | bit in the block that was also set in the result from mask. Similarly, 103 | `block_check` returns `true` when every bit that is set in the result 104 | of `mask` is also set in the block. 105 | 106 | ``` 107 | void block_insert(block b, unsigned int32 x) { 108 | block masked = mask(x) 109 | for i in [0..7] { 110 | for j in [0..31] { 111 | if (masked.getWord(i).isSet(j)) { 112 | b.getWord(i).setBit(j) 113 | } 114 | } 115 | } 116 | } 117 | ``` 118 | 119 | ``` 120 | boolean block_check(block b, unsigned int32 x) { 121 | block masked = mask(x) 122 | for i in [0..7] { 123 | for j in [0..31] { 124 | if (masked.getWord(i).isSet(j)) { 125 | if (not b.getWord(i).setBit(j)) { 126 | return false 127 | } 128 | } 129 | } 130 | } 131 | return true 132 | } 133 | ``` 134 | 135 | The reader will note that a block, as defined here, is actually a 136 | special kind of Bloom filter. Specifically it is a "split" Bloom 137 | filter, as described in section 2.1 of [Network Applications of Bloom 138 | Filters: A 139 | Survey](https://www.eecs.harvard.edu/~michaelm/postscripts/im2005b.pdf). The 140 | use of multiplication by an odd constant and then shifting right is a 141 | method of hashing integers as described in section 2.2 of 142 | Dietzfelbinger et al.'s [A reliable randomized algorithm for the 143 | closest-pair 144 | problem](http://hjemmesider.diku.dk/~jyrki/Paper/CP-11.4.1997.pdf). 145 | 146 | This closes the definition of a block and the operations on it. 147 | 148 | Now that a block is defined, we can describe Parquet's split block 149 | Bloom filters. A split block Bloom filter (henceforth "SBBF") is 150 | composed of `z` blocks, where `z` is greater than or equal to one and 151 | less than 2 to the 31st power. When an SBBF is initialized, each block 152 | in it is initialized, which means each bit in each word in each block 153 | in the SBBF is unset. 154 | 155 | In addition to initialization, an SBBF supports an operation called 156 | `filter_insert` and one called `filter_check`. Each takes as an 157 | argument a 64-bit unsigned integer; `filter_check` returns a boolean 158 | and `filter_insert` does not return a value, but does modify the SBBF. 159 | 160 | The `filter_insert` operation first uses the most significant 32 bits 161 | of its argument to select a block to operate on. Call the argument 162 | "`h`", and recall the use of "`z`" to mean the number of blocks. Then 163 | a block number `i` between `0` and `z-1` (inclusive) to operate on is 164 | chosen as follows: 165 | 166 | ```c 167 | unsigned int64 h_top_bits = h >> 32; 168 | unsigned int64 z_as_64_bit = z; 169 | unsigned int32 i = (h_top_bits * z_as_64_bit) >> 32; 170 | ``` 171 | 172 | The first line extracts the most significant 32 bits from `h` and 173 | assignes them to a 64-bit unsigned integer. The second line is 174 | simpler: it just sets an unsigned 64-bit value to the same value as 175 | the 32-bit unsigned value `z`. The purpose of having both `h_top_bits` 176 | and `z_as_64_bit` be 64-bit values is so that their product is a 177 | 64-bit value. That product is taken in the third line, and then the 178 | most significant 32 bits are extracted into the value `i`, which is 179 | the index of the block that will be operated on. 180 | 181 | 182 | After this process to select `i`, `filter_insert` uses the least 183 | significant 32 bits of `h` as the argument to `block_insert` called on 184 | block `i`. 185 | 186 | The technique for converting the most significant 32 bits to an 187 | integer between `0` and `z-1` (inclusive) avoids using the modulo 188 | operation, which is often very slow. This trick can be found in 189 | [Kenneth A. Ross's 2006 IBM research report, "Efficient Hash Probes on 190 | Modern Processors"]( 191 | https://domino.research.ibm.com/library/cyberdig.nsf/papers/DF54E3545C82E8A585257222006FD9A2/$File/rc24100.pdf) 192 | 193 | The `filter_check` operation uses the same method as `filter_insert` 194 | to select a block to operate on, then uses the least significant 32 195 | bits of its argument as an argument to `block_check` called on that 196 | block, returning the result. 197 | 198 | In the pseudocode below, the modulus operator is represented with the C 199 | language's "`%`" operator. The "`>>`" operator is used to denote the 200 | conversion of an unsigned 64-bit integer to an unsigned 32-bit integer 201 | containing only the most significant 32 bits, and C's cast operator 202 | "`(unsigned int32)`" is used to denote the conversion of an unsigned 203 | 64-bit integer to an unsigned 32-bit integer containing only the least 204 | significant 32 bits. 205 | 206 | ``` 207 | void filter_insert(SBBF filter, unsigned int64 x) { 208 | unsigned int64 i = ((x >> 32) * filter.numberOfBlocks()) >> 32; 209 | block b = filter.getBlock(i); 210 | block_insert(b, (unsigned int32)x) 211 | } 212 | ``` 213 | 214 | ``` 215 | boolean filter_check(SBBF filter, unsigned int64 x) { 216 | unsigned int64 i = ((x >> 32) * filter.numberOfBlocks()) >> 32; 217 | block b = filter.getBlock(i); 218 | return block_check(b, (unsigned int32)x) 219 | } 220 | ``` 221 | 222 | The use of blocks is from Putze et al.'s [Cache-, Hash- and 223 | Space-Efficient Bloom 224 | filters](http://algo2.iti.kit.edu/documents/cacheefficientbloomfilters-jea.pdf) 225 | 226 | To use an SBBF for values of arbitrary Parquet types, we apply a hash 227 | function to that value - at the time of writing, 228 | [xxHash](https://cyan4973.github.io/xxHash/), using the function XXH64 229 | with a seed of 0 and [following the specification version 230 | 0.1.1](https://github.com/Cyan4973/xxHash/blob/v0.7.0/doc/xxhash_spec.md). 231 | 232 | #### Sizing an SBBF 233 | 234 | The `check` operation in SBBFs can return `true` for an argument that 235 | was never inserted into the SBBF. These are called "false 236 | positives". The "false positive probabilty" is the probability that 237 | any given hash value that was never `insert`ed into the SBBF will 238 | cause `check` to return `true` (a false positive). There is not a 239 | simple closed-form calculation of this probability, but here is an 240 | example: 241 | 242 | A filter that uses 1024 blocks and has had 26,214 hash values 243 | `insert`ed will have a false positive probabilty of around 1.26%. Each 244 | of those 1024 blocks occupies 256 bits of space, so the total space 245 | usage is 262,144. That means that the ratio of bits of space to hash 246 | values is 10-to-1. Adding more hash values increases the denominator 247 | and lowers the ratio, which increases the false positive 248 | probability. For instance, inserting twice as many hash values 249 | (52,428) decreases the ratio of bits of space per hash value inserted 250 | to 5-to-1 and increases the false positive probability to 251 | 18%. Inserting half as many hash values (13,107) increases the ratio 252 | of bits of space per hash value inserted to 20-to-1 and decreases the 253 | false positive probability to 0.04%. 254 | 255 | Here are some sample values of the ratios needed to achieve certain 256 | false positive rates: 257 | 258 | | Bits of space per `insert` | False positive probability | 259 | | -------------------------- | -------------------------- | 260 | | 6.0 | 10 % | 261 | | 10.5 | 1 % | 262 | | 16.9 | 0.1 % | 263 | | 26.4 | 0.01 % | 264 | | 41 | 0.001 % | 265 | 266 | #### File Format 267 | 268 | Each multi-block Bloom filter is required to work for only one column chunk. The data of a multi-block 269 | bloom filter consists of the bloom filter header followed by the bloom filter bitset. The bloom filter 270 | header encodes the size of the bloom filter bit set in bytes that is used to read the bitset. 271 | 272 | Here are the Bloom filter definitions in thrift: 273 | 274 | 275 | ``` 276 | /** Block-based algorithm type annotation. **/ 277 | struct SplitBlockAlgorithm {} 278 | /** The algorithm used in Bloom filter. **/ 279 | union BloomFilterAlgorithm { 280 | /** Block-based Bloom filter. **/ 281 | 1: SplitBlockAlgorithm BLOCK; 282 | } 283 | 284 | /** Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash 285 | * algorithm. It uses 64 bits version of xxHash. 286 | **/ 287 | struct XxHash {} 288 | 289 | /** 290 | * The hash function used in Bloom filter. This function takes the hash of a column value 291 | * using plain encoding. 292 | **/ 293 | union BloomFilterHash { 294 | /** xxHash Strategy. **/ 295 | 1: XxHash XXHASH; 296 | } 297 | 298 | /** 299 | * The compression used in the Bloom filter. 300 | **/ 301 | struct Uncompressed {} 302 | union BloomFilterCompression { 303 | 1: Uncompressed UNCOMPRESSED; 304 | } 305 | 306 | /** 307 | * Bloom filter header is stored at beginning of Bloom filter data of each column 308 | * and followed by its bitset. 309 | **/ 310 | struct BloomFilterPageHeader { 311 | /** The size of bitset in bytes **/ 312 | 1: required i32 numBytes; 313 | /** The algorithm for setting bits. **/ 314 | 2: required BloomFilterAlgorithm algorithm; 315 | /** The hash function used for Bloom filter. **/ 316 | 3: required BloomFilterHash hash; 317 | /** The compression used in the Bloom filter **/ 318 | 4: required BloomFilterCompression compression; 319 | } 320 | 321 | struct ColumnMetaData { 322 | ... 323 | /** Byte offset from beginning of file to Bloom filter data. **/ 324 | 14: optional i64 bloom_filter_offset; 325 | } 326 | 327 | ``` 328 | 329 | The Bloom filters are grouped by row group and with data for each column in the same order as the file schema. 330 | The Bloom filter data can be stored before the page indexes after all row groups. The file layout looks like: 331 | ![File Layout - Bloom filter footer](doc/images/FileLayoutBloomFilter2.png) 332 | 333 | Or it can be stored between row groups, the file layout looks like: 334 | ![File Layout - Bloom filter footer](doc/images/FileLayoutBloomFilter1.png) 335 | 336 | #### Encryption 337 | In the case of columns with sensitive data, the Bloom filter exposes a subset of sensitive 338 | information such as the presence of value. Therefore the Bloom filter of columns with sensitive 339 | data should be encrypted with the column key, and the Bloom filter of other (not sensitive) columns 340 | do not need to be encrypted. 341 | 342 | Bloom filters have two serializable modules - the PageHeader thrift structure (with its internal 343 | fields, including the BloomFilterPageHeader `bloom_filter_page_header`), and the Bitset. The header 344 | structure is serialized by Thrift, and written to file output stream; it is followed by the 345 | serialized Bitset. 346 | 347 | For Bloom filters in sensitive columns, each of the two modules will be encrypted after 348 | serialization, and then written to the file. The encryption will be performed using the AES GCM 349 | cipher, with the same column key, but with different AAD module types - "BloomFilter Header" (8) 350 | and "BloomFilter Bitset" (9). The length of the encrypted buffer is written before the buffer, as 351 | described in the Parquet encryption specification. 352 | -------------------------------------------------------------------------------- /dev/merge_parquet_pr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # Utility for creating well-formed pull request merges and pushing them to Apache. 21 | # usage: ./apache-pr-merge.py (see config env vars below) 22 | # 23 | # This utility assumes you already have local a Parquet git folder and that you 24 | # have added remotes corresponding to both (i) the github apache Parquet 25 | # mirror and (ii) the apache git repo. 26 | 27 | import json 28 | import os 29 | import re 30 | import subprocess 31 | import sys 32 | import tempfile 33 | import urllib2 34 | import getpass 35 | 36 | try: 37 | import jira.client 38 | JIRA_IMPORTED = True 39 | except ImportError: 40 | JIRA_IMPORTED = False 41 | 42 | # Location of your Parquet git development area 43 | PARQUET_HOME = os.path.abspath(__file__).rsplit("/", 2)[0] 44 | PROJECT_NAME = PARQUET_HOME.rsplit("/", 1)[1] 45 | print "PARQUET_HOME = " + PARQUET_HOME 46 | print "PROJECT_NAME = " + PROJECT_NAME 47 | 48 | def lines_from_cmd(cmd): 49 | return subprocess.check_output(cmd.split(" ")).strip().split("\n") 50 | 51 | # Remote name which points to the GitHub site 52 | PR_REMOTE_NAME = os.environ.get("PR_REMOTE_NAME") 53 | available_remotes = lines_from_cmd("git remote") 54 | if PR_REMOTE_NAME is not None: 55 | if PR_REMOTE_NAME not in available_remotes: 56 | print "ERROR: git remote '%s' is not defined." % PR_REMOTE_NAME 57 | sys.exit(-1) 58 | else: 59 | remote_candidates = ["github-apache", "apache-github"] 60 | # Get first available remote from the list of candidates 61 | PR_REMOTE_NAME = next((remote for remote in available_remotes if remote in remote_candidates), None) 62 | 63 | # Remote name which points to Apache git 64 | PUSH_REMOTE_NAME = os.environ.get("PUSH_REMOTE_NAME", "apache") 65 | # ASF JIRA username 66 | JIRA_USERNAME = os.environ.get("JIRA_USERNAME") 67 | # ASF JIRA password 68 | JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD") 69 | 70 | GITHUB_BASE = "https://github.com/apache/" + PROJECT_NAME + "/pull" 71 | GITHUB_API_BASE = "https://api.github.com/repos/apache/" + PROJECT_NAME 72 | JIRA_BASE = "https://issues.apache.org/jira/browse" 73 | JIRA_API_BASE = "https://issues.apache.org/jira" 74 | # Prefix added to temporary branches 75 | BRANCH_PREFIX = "PR_TOOL" 76 | 77 | os.chdir(PARQUET_HOME) 78 | 79 | 80 | def get_json(url): 81 | try: 82 | return json.load(urllib2.urlopen(url)) 83 | except urllib2.HTTPError as e: 84 | print "Unable to fetch URL, exiting: %s" % url 85 | sys.exit(-1) 86 | 87 | 88 | def fail(msg): 89 | print msg 90 | clean_up() 91 | sys.exit(-1) 92 | 93 | 94 | def run_cmd(cmd): 95 | try: 96 | if isinstance(cmd, list): 97 | return subprocess.check_output(cmd) 98 | else: 99 | return subprocess.check_output(cmd.split(" ")) 100 | except subprocess.CalledProcessError as e: 101 | # this avoids hiding the stdout / stderr of failed processes 102 | print 'Command failed: %s' % cmd 103 | print 'With output:' 104 | print '--------------' 105 | print e.output 106 | print '--------------' 107 | raise e 108 | 109 | def continue_maybe(prompt): 110 | result = raw_input("\n%s (y/n): " % prompt) 111 | if result.lower() != "y": 112 | fail("Okay, exiting") 113 | 114 | 115 | original_head = run_cmd("git rev-parse HEAD")[:8] 116 | 117 | 118 | def clean_up(): 119 | print "Restoring head pointer to %s" % original_head 120 | run_cmd("git checkout %s" % original_head) 121 | 122 | branches = run_cmd("git branch").replace(" ", "").split("\n") 123 | 124 | for branch in filter(lambda x: x.startswith(BRANCH_PREFIX), branches): 125 | print "Deleting local branch %s" % branch 126 | run_cmd("git branch -D %s" % branch) 127 | 128 | 129 | # merge the requested PR and return the merge hash 130 | def merge_pr(pr_num, target_ref): 131 | pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, pr_num) 132 | target_branch_name = "%s_MERGE_PR_%s_%s" % (BRANCH_PREFIX, pr_num, target_ref.upper()) 133 | run_cmd("git fetch %s pull/%s/head:%s" % (PR_REMOTE_NAME, pr_num, pr_branch_name)) 134 | run_cmd("git fetch %s %s:%s" % (PUSH_REMOTE_NAME, target_ref, target_branch_name)) 135 | run_cmd("git checkout %s" % target_branch_name) 136 | 137 | had_conflicts = False 138 | try: 139 | run_cmd(['git', 'merge', pr_branch_name, '--squash']) 140 | except Exception as e: 141 | msg = "Error merging: %s\nWould you like to manually fix-up this merge?" % e 142 | continue_maybe(msg) 143 | msg = "Okay, please fix any conflicts and 'git add' conflicting files... Finished?" 144 | continue_maybe(msg) 145 | had_conflicts = True 146 | 147 | commit_authors = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name, 148 | '--pretty=format:%an <%ae>']).split("\n") 149 | distinct_authors = sorted(set(commit_authors), 150 | key=lambda x: commit_authors.count(x), reverse=True) 151 | primary_author = distinct_authors[0] 152 | commits = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name, 153 | '--pretty=format:%h [%an] %s']).split("\n\n") 154 | 155 | merge_message_flags = [] 156 | 157 | merge_message_flags += ["-m", title] 158 | if body != None: 159 | merge_message_flags += ["-m", body] 160 | 161 | authors = "\n".join(["Author: %s" % a for a in distinct_authors]) 162 | 163 | merge_message_flags += ["-m", authors] 164 | 165 | if had_conflicts: 166 | committer_name = run_cmd("git config --get user.name").strip() 167 | committer_email = run_cmd("git config --get user.email").strip() 168 | message = "This patch had conflicts when merged, resolved by\nCommitter: %s <%s>" % ( 169 | committer_name, committer_email) 170 | merge_message_flags += ["-m", message] 171 | 172 | # The string "Closes #%s" string is required for GitHub to correctly close the PR 173 | merge_message_flags += [ 174 | "-m", 175 | "Closes #%s from %s and squashes the following commits:" % (pr_num, pr_repo_desc)] 176 | for c in commits: 177 | merge_message_flags += ["-m", c] 178 | 179 | run_cmd(['git', 'commit', '--author="%s"' % primary_author] + merge_message_flags) 180 | 181 | continue_maybe("Merge complete (local ref %s). Push to %s?" % ( 182 | target_branch_name, PUSH_REMOTE_NAME)) 183 | 184 | try: 185 | run_cmd('git push %s %s:%s' % (PUSH_REMOTE_NAME, target_branch_name, target_ref)) 186 | except Exception as e: 187 | clean_up() 188 | fail("Exception while pushing: %s" % e) 189 | 190 | merge_hash = run_cmd("git rev-parse %s" % target_branch_name)[:8] 191 | clean_up() 192 | print("Pull request #%s merged!" % pr_num) 193 | print("Merge hash: %s" % merge_hash) 194 | return merge_hash 195 | 196 | 197 | def cherry_pick(pr_num, merge_hash, default_branch): 198 | pick_ref = raw_input("Enter a branch name [%s]: " % default_branch) 199 | if pick_ref == "": 200 | pick_ref = default_branch 201 | 202 | pick_branch_name = "%s_PICK_PR_%s_%s" % (BRANCH_PREFIX, pr_num, pick_ref.upper()) 203 | 204 | run_cmd("git fetch %s %s:%s" % (PUSH_REMOTE_NAME, pick_ref, pick_branch_name)) 205 | run_cmd("git checkout %s" % pick_branch_name) 206 | run_cmd("git cherry-pick -sx %s" % merge_hash) 207 | 208 | continue_maybe("Pick complete (local ref %s). Push to %s?" % ( 209 | pick_branch_name, PUSH_REMOTE_NAME)) 210 | 211 | try: 212 | run_cmd('git push %s %s:%s' % (PUSH_REMOTE_NAME, pick_branch_name, pick_ref)) 213 | except Exception as e: 214 | clean_up() 215 | fail("Exception while pushing: %s" % e) 216 | 217 | pick_hash = run_cmd("git rev-parse %s" % pick_branch_name)[:8] 218 | clean_up() 219 | 220 | print("Pull request #%s picked into %s!" % (pr_num, pick_ref)) 221 | print("Pick hash: %s" % pick_hash) 222 | return pick_ref 223 | 224 | 225 | def fix_version_from_branch(branch, versions): 226 | # Note: Assumes this is a sorted (newest->oldest) list of un-released versions 227 | if branch == "master": 228 | return versions[0] 229 | else: 230 | branch_ver = branch.replace("branch-", "") 231 | return filter(lambda x: x.name.startswith(branch_ver), versions)[-1] 232 | 233 | def exctract_jira_id(title): 234 | m = re.search(r'^(PARQUET-[0-9]+)\b.*$', title, re.IGNORECASE) 235 | if m and m.groups > 0: 236 | return m.group(1).upper() 237 | else: 238 | fail("PR title should be prefixed by a jira id \"PARQUET-XXX: ...\", found: \"%s\"" % title) 239 | 240 | def check_jira(title): 241 | jira_id = exctract_jira_id(title) 242 | asf_jira = jira.client.JIRA({'server': JIRA_API_BASE}, 243 | basic_auth=(JIRA_USERNAME, JIRA_PASSWORD)) 244 | try: 245 | issue = asf_jira.issue(jira_id) 246 | except Exception as e: 247 | fail("ASF JIRA could not find %s\n%s" % (jira_id, e)) 248 | 249 | def resolve_jira(title, merge_branches, comment): 250 | asf_jira = jira.client.JIRA({'server': JIRA_API_BASE}, 251 | basic_auth=(JIRA_USERNAME, JIRA_PASSWORD)) 252 | 253 | default_jira_id = exctract_jira_id(title) 254 | 255 | jira_id = raw_input("Enter a JIRA id [%s]: " % default_jira_id) 256 | if jira_id == "": 257 | jira_id = default_jira_id 258 | 259 | try: 260 | issue = asf_jira.issue(jira_id) 261 | except Exception as e: 262 | fail("ASF JIRA could not find %s\n%s" % (jira_id, e)) 263 | 264 | cur_status = issue.fields.status.name 265 | cur_summary = issue.fields.summary 266 | cur_assignee = issue.fields.assignee 267 | if cur_assignee is None: 268 | cur_assignee = "NOT ASSIGNED!!!" 269 | else: 270 | cur_assignee = cur_assignee.displayName 271 | 272 | if cur_status == "Resolved" or cur_status == "Closed": 273 | fail("JIRA issue %s already has status '%s'" % (jira_id, cur_status)) 274 | print ("=== JIRA %s ===" % jira_id) 275 | print ("summary\t\t%s\nassignee\t%s\nstatus\t\t%s\nurl\t\t%s/%s\n" % ( 276 | cur_summary, cur_assignee, cur_status, JIRA_BASE, jira_id)) 277 | 278 | versions = asf_jira.project_versions("PARQUET") 279 | versions = sorted(versions, key=lambda x: x.name, reverse=True) 280 | versions = filter(lambda x: x.raw['released'] is False, versions) 281 | 282 | default_fix_versions = map(lambda x: fix_version_from_branch(x, versions).name, merge_branches) 283 | for v in default_fix_versions: 284 | # Handles the case where we have forked a release branch but not yet made the release. 285 | # In this case, if the PR is committed to the master branch and the release branch, we 286 | # only consider the release branch to be the fix version. E.g. it is not valid to have 287 | # both 1.1.0 and 1.0.0 as fix versions. 288 | (major, minor, patch) = v.split(".") 289 | if patch == "0": 290 | previous = "%s.%s.%s" % (major, int(minor) - 1, 0) 291 | if previous in default_fix_versions: 292 | default_fix_versions = filter(lambda x: x != v, default_fix_versions) 293 | default_fix_versions = ",".join(default_fix_versions) 294 | 295 | fix_versions = raw_input("Enter comma-separated fix version(s) [%s]: " % default_fix_versions) 296 | if fix_versions == "": 297 | fix_versions = default_fix_versions 298 | fix_versions = fix_versions.replace(" ", "").split(",") 299 | 300 | def get_version_json(version_str): 301 | return filter(lambda v: v.name == version_str, versions)[0].raw 302 | 303 | jira_fix_versions = map(lambda v: get_version_json(v), fix_versions) 304 | 305 | resolve = filter(lambda a: a['name'] == "Resolve Issue", asf_jira.transitions(jira_id))[0] 306 | asf_jira.transition_issue( 307 | jira_id, resolve["id"], fixVersions=jira_fix_versions, comment=comment) 308 | 309 | print "Succesfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions) 310 | 311 | if JIRA_IMPORTED: 312 | jira_login_accepted = False 313 | while not jira_login_accepted: 314 | if JIRA_USERNAME: 315 | print "JIRA username: %s" % JIRA_USERNAME 316 | else: 317 | JIRA_USERNAME = raw_input("Enter JIRA username: ") 318 | 319 | if not JIRA_PASSWORD: 320 | JIRA_PASSWORD = getpass.getpass("Enter JIRA password: ") 321 | 322 | try: 323 | asf_jira = jira.client.JIRA({'server': JIRA_API_BASE}, 324 | basic_auth=(JIRA_USERNAME, JIRA_PASSWORD)) 325 | jira_login_accepted = True 326 | except Exception as e: 327 | print "\nJIRA login failed, try again\n" 328 | JIRA_USERNAME = None 329 | JIRA_PASSWORD = None 330 | else: 331 | print "WARNING: Could not find jira python library. Run 'sudo pip install jira' to install." 332 | print "The tool will continue to run but won't handle the JIRA." 333 | print 334 | 335 | branches = get_json("%s/branches" % GITHUB_API_BASE) 336 | branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches]) 337 | # Assumes branch names can be sorted lexicographically 338 | # Julien: I commented this out as we don't have any "branch-*" branch yet 339 | #latest_branch = sorted(branch_names, reverse=True)[0] 340 | 341 | pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ") 342 | pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num)) 343 | 344 | url = pr["url"] 345 | title = pr["title"] 346 | if JIRA_IMPORTED: 347 | check_jira(title) 348 | body = pr["body"] 349 | target_ref = pr["base"]["ref"] 350 | user_login = pr["user"]["login"] 351 | base_ref = pr["head"]["ref"] 352 | pr_repo_desc = "%s/%s" % (user_login, base_ref) 353 | 354 | if pr["merged"] is True: 355 | print "Pull request %s has already been merged, assuming you want to backport" % pr_num 356 | merge_commit_desc = run_cmd([ 357 | 'git', 'log', '--merges', '--first-parent', 358 | '--grep=pull request #%s' % pr_num, '--oneline']).split("\n")[0] 359 | if merge_commit_desc == "": 360 | fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num) 361 | 362 | merge_hash = merge_commit_desc[:7] 363 | message = merge_commit_desc[8:] 364 | 365 | print "Found: %s" % message 366 | maybe_cherry_pick(pr_num, merge_hash, latest_branch) 367 | sys.exit(0) 368 | 369 | if not bool(pr["mergeable"]): 370 | msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \ 371 | "Continue? (experts only!)" 372 | continue_maybe(msg) 373 | 374 | print ("\n=== Pull Request #%s ===" % pr_num) 375 | print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % ( 376 | title, pr_repo_desc, target_ref, url)) 377 | continue_maybe("Proceed with merging pull request #%s?" % pr_num) 378 | 379 | merged_refs = [target_ref] 380 | 381 | merge_hash = merge_pr(pr_num, target_ref) 382 | 383 | pick_prompt = "Would you like to pick %s into another branch?" % merge_hash 384 | while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y": 385 | merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)] 386 | 387 | if JIRA_IMPORTED: 388 | continue_maybe("Would you like to update the associated JIRA?") 389 | jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num) 390 | resolve_jira(title, merged_refs, jira_comment) 391 | else: 392 | print "WARNING: Could not find jira python library. Run 'sudo pip install jira' to install." 393 | print "Exiting without trying to close the associated JIRA." 394 | -------------------------------------------------------------------------------- /KEYS: -------------------------------------------------------------------------------- 1 | This file contains the PGP keys of various developers. 2 | 3 | Users: pgp < KEYS 4 | gpg --import KEYS 5 | Developers: 6 | pgp -kxa and append it to this file. 7 | (pgpk -ll && pgpk -xa ) >> this file. 8 | (gpg --list-sigs --keyid-format long 9 | && gpg --armor --export ) >> this file. 10 | 11 | pub 2048R/97D7E8647AE7E47B 2013-04-10 [expired: 2017-04-10] 12 | uid Julien Le Dem 13 | sig 3 97D7E8647AE7E47B 2013-04-10 Julien Le Dem 14 | sig FCB3CBD9D3924CCD 2014-09-08 Ryan Blue (CODE SIGNING KEY) 15 | sig 7CD8278971F0F13B 2014-09-08 Tianshuo Deng 16 | 17 | -----BEGIN PGP PUBLIC KEY BLOCK----- 18 | Version: GnuPG v1 19 | 20 | mQENBFFll5kBCACk/tTfHSxUT2W9phkLQzJs6AV4GElqcFo7ZNE1DwAB/gk8uJwR 21 | Po7WYaO2/91hNu4y1SooDRGnqz0FvZzOA8sW/KujK13MMqmGYb1jJdwPjNq6KOK/ 22 | 3EygCxq9DxSS+TILvq3NsFgYGdopdJxRl9zh15Po/3c/jNMPtnGZzP39EsfMhgIS 23 | YwwiEHPVPB00Q0IGRQMhtJqh1AQ5KrxqK4+uEwwu3Sb52DpBjfgffl8GMGKfH/tk 24 | VvJ6L+7rPXtNqho5b7i8379//Bn9xwgO2YCtjPoZMVg37M6f6hVWMr3fFmX/OXgU 25 | UWwLGOTAeuLKWkikFJr5y0rzDaF2qcD9t7wfABEBAAG0IEp1bGllbiBMZSBEZW0g 26 | PGp1bGllbkBsZWRlbS5uZXQ+iQE9BBMBCgAnBQJRZZeZAhsvBQkHhh+ABQsJCAcD 27 | BRUKCQgLBRYCAwEAAh4BAheAAAoJEJfX6GR65+R7au4IAIfZVA9eWBZn9NuaWX7L 28 | Xi+xDtzrfUrsWZxMIP6zkQsIspiX9AThGv3zDn+Tpfw7svV1QfUQX0LHbwMMYqq+ 29 | mRJB/kqYutpLxw7h63zrWR2k2Sdzvole2c3Rfk1vblIdWZk7ArLSivqTk/oGwr7d 30 | MejvOMmKSzqW0vQF6dNbYerLOiqPr4mKqONWm4nOLZEBzjE3IfbK3gNBSFq+92jV 31 | iWY6ozqAxydYafNUSZRrcniYskxd9JCSSLZiIZW3X9lToA/74LjpPbmzvQtkH68D 32 | 0EnC1mkPTKCA4r+CLb3a9GJ9Surg2T0OptyPHsXipgViVryXgopD2odA3fh9SY5l 33 | Ee+JAhwEEAECAAYFAlQN+kQACgkQ/LPL2dOSTM3+OA//dYj9kiZhZNVb6hMfrubn 34 | OjTmY8Hcax8G+aJWxRrGE8HrCUjEJ4NThK523+fmol1PxNWsguljlsZvJ189YPOh 35 | weDJzNmKwhLntq/uBgtJyWBN1v9bUzkR9Ud+UdD1tPbNj7sNiIQE1ZqWMxra3sq/ 36 | gcodVgqSADGgjKO9tenQhWvQXxBR55MOqZbxnyazRPEYS0mkN0A0DwtG82tHNRL7 37 | Z3vs/kG5hoW3kYifCZn5pW3wKtfIY5JH7usYOzA86p7GH4hOfO+dzhDANH+C+u9O 38 | ZRbCdUE8oEp3fAWY9+3VzlO5ixpFOeHGfbSJp44Jv6wUOxNwRmD/gk+DxVrsS/Yn 39 | rLFCZgDHgkFHGJ1D7PnxTy4qtwGasYxWYJOUiaAJbOvRa8nbhan2/wsrgnJTbXAH 40 | +7v5tFfCV77Po//V0fojYZNvbkEO8/yRpQL+uKiVRaRD5dMfHRb31OR0A59ssYX9 41 | 63QpBEof/OeELC0VowG+KCc+4CfSMmAGnQMdEhMAUPz+79nJw7ijeF5C82Z5mQof 42 | v+nf+kdqr80UbG+RoODKtlHFETxJ5STQe6uiPOfvb+EADPA0cZ34u5tD3Z+SMV1k 43 | Gf7Jxi45jmkn9Z9AkVj6KgdDeSjV7EkRiY0pm43Vvd6WvV5t54cgJcwXrjG+h03f 44 | 65w7F+KBrh7YAcUvrf4JeXKJARwEEAECAAYFAlQN/XwACgkQfNgniXHw8TtU9Af/ 45 | b9CYFtsG9q1ZbnV9SChxjLLUipGsmKTUjCnz7oiZvJJ04e+0np1NQJKJbthGfEDM 46 | eLt1WiYpTDu66zAuLDA7ACcbv3UUXXsUTEfN76J+9DJHrtK1soHGLkKLW2hZeWKp 47 | PKya/HRF4Rv3/aAwWtRjEuQr9pLt/wAOedV6mrpyTngOKQn97tzo/yUeDNG7be8A 48 | xtUStQY/2zJmHkaLeULKOspgUchBQ1S+M4q46dE+tyel47BLyHIECqk/geLOlZmh 49 | lo6TtVgnBSXC5SqMwh5pz/P5ntQ8FVLedGQI9dwVhxbjoo5DNB/6ntfbwkheiak1 50 | CFBm0ZVPJjX7F2XFcq7VCrkBDQRRZZeZAQgA4eixR7xHvnTyF12CYLsnFE8x1tI+ 51 | 78FCjKm0n1YPCzEYa70bnnZmpW4KCwO0flN4RhhP+g2KRCCov2ZH7bxvhTxe4n/j 52 | T6I/+61Fpba4I7qExYqX+tylyjUKhynLcWCbvRQnyjOMTaLbMVrftV+ATVmj7fi0 53 | PdzRW/7QvCSrDsMFtTSaNBdeMbzptpoXAxTgVZOIoHbWOIfovN1uPnFItrmNnKXX 54 | KGyDPX2s2KCz10G1lrw0l9tqDg+BtqE9/xCtqWoZJMnT8jAJZeJ0V37R1jDBDEHK 55 | AfPOUKNYf5GWxJeCWYzL77ve8VdItKwPhtjW7zFKuyrqiBHE40fgTLKvNQARAQAB 56 | iQJEBBgBCgAPBQJRZZeZAhsuBQkHhh+AASkJEJfX6GR65+R7wF0gBBkBCgAGBQJR 57 | ZZeZAAoJECrRWHEDxOERzmEIAOCrfYGPdLyzBn/xAdymx2FaNTS48ybMIGjcu6Od 58 | nKzvgBJObLPQf0+WKhkbQf2HEHYinBVpX8K4dNY9RhzIRbQNhCWY5E5/leI/nQ9O 59 | ZBUMpT8Gw5saj0YtF3By4E9ywxNWiAyX2SAHjPv/lub0PEaUiWWe6s9MaX5fp71C 60 | TupkdElpxucEpVefUaUOSMQ2ecOniCh/9ltPLYcjwnC1ti+Et8/cAK2N554GNE+x 61 | fO3qtGXGUleWhpt3fblTcCyO+odAPKxm70jnABLk8m+KpffcdBYSJ5ai5hPkrnyq 62 | 3NBRDPGlLdtDkzn0/xKYnVbLW1d+d2NFwJzEKncQphHoo0T19wf8DSfym7dIsstj 63 | jwFI8+N/1yCdMD886x8bgmsSsNiD9tro+1083yr+IL5+gUs8Q4ETpsow+IS6sfp2 64 | fzA0TaLBLEOFYy/XFxnzO+YtVNIDAnrDEgTOMahFUrJ/HVZF9xT+kKwhyHaRNIQL 65 | CYc4VoSWldqoDVOGI30NjtVo5EGzf3qVWkTm4yplBhJvJanxrMHuJAWRgFX8D48B 66 | cs/senr8s+O0oXQQYIjz/FkZh/mQFtrgsvnzyUR52SnwEzNMmXjZNkydPZwcY6mu 67 | cqCIvQIvmBpPdlyaoglwJ8wWb76uIE6VFcN71FF3EfV51/yUeQGJaoExWLY6IH8x 68 | Xtn3IWkBWA== 69 | =xpC8 70 | -----END PGP PUBLIC KEY BLOCK----- 71 | pub 2048R/7CD8278971F0F13B 2013-08-26 72 | uid Tianshuo Deng 73 | sig 3 7CD8278971F0F13B 2013-08-26 Tianshuo Deng 74 | sig FCB3CBD9D3924CCD 2014-09-08 Ryan Blue (CODE SIGNING KEY) 75 | sig 97D7E8647AE7E47B 2014-09-08 Julien Le Dem 76 | sub 2048R/F98EFADB0CEDD7ED 2013-08-26 77 | sig 7CD8278971F0F13B 2013-08-26 Tianshuo Deng 78 | 79 | -----BEGIN PGP PUBLIC KEY BLOCK----- 80 | Version: GnuPG v1 81 | 82 | mQENBFIb4SsBCAC/gzo6AHmPnIMGljgTFkBOErICfOrosRqENWVMdY6lJ7yTuKPS 83 | 1zk39SGgVkf6gydcdxRuOr9J8vyI3Fjv30tQF8AjH8HR8KMmZ//CupiBP/7Wa1UW 84 | CM9ZnNqapGDZWtj4wCLFlPiXl2Z3u2+drVWB/zuBC3EisNk/byTy3NfQOR9G+EJI 85 | ehqn8mNr9AwKvoQsXNoWiz0xHgHQRCdL2RHqaVfsx0UgBKcaZ4Fj0PDEt4NSX8ck 86 | KrlQn5Rlr//2diZ0+OTMqI+kG3fCy90W5VLEjEkQQBkQywkNcUTFUJ5cTUsqJcWY 87 | UiMJAat5eGGl0YrcflikxMLujG7qDXhvFH4FABEBAAG0IVRpYW5zaHVvIERlbmcg 88 | PHRkZW5nQHR3aXR0ZXIuY29tPokBOAQTAQIAIgUCUhvhKwIbAwYLCQgHAwIGFQgC 89 | CQoLBBYCAwECHgECF4AACgkQfNgniXHw8TsfrQgAmdsgRTl76k3kqLgxElcIVqnW 90 | o9HbzUzmhqkluFlHz3A5DNqphhu61pL4eKVrDY6HDobuI47n1N31OwXUJWFPyOvr 91 | bjKvz98Q2GTgdXZrFPib/3vr3QxWSCGNf6xDM446B0ezm7TqKNgcTixD5Y9CRTnY 92 | PIz7bLYOLX/KeSbrZhB7u0OEYSuHRiFJpRLgAxrsx7OQPRUHSTXobfGvhDduEVue 93 | i8XMhcvJmSPNFA/YFZxKXI8zeiFm87foemcnVAFat7m846NtSLmRtYS9IBvBMiSY 94 | 5Qi+QKB1aEBLh/UbiKcvUytK9MaPs6E+nNmnbDBQ5cxeHaKk0KEWrdXt+yPK94kC 95 | HAQQAQIABgUCVA38lAAKCRD8s8vZ05JMzaSSEACWjYI6Q+unzZCAcON2CVNcAryY 96 | cyzzWiQkbs1URqCkRMcjIwLHTlxdFtwyjId6JNgQLu8z39JtC6/PUt1wDs/6x4lw 97 | 3PJf9jwHoc5IP3ohp4WCeDuMtipXdVtchZtO2M9osVCU57cWvsvXaXT9JHm/wKqm 98 | ZfziSVcURsNYW1klLtdV25Qd+GOP88aZW+l2F0dfSTYBRihGqH9UwkZ9mxyoAfqv 99 | roIpR5WtUs/fBBDWqk1p3IK2S7/cRDVmdOAyK8Vyw+QMxW9B++zesOS7cCJnWl6w 100 | oRJDcCd1wYlKSPOZDmyGeK98ROlKtxVksaH71MJcUy58NLkhg41ibj43mOqigCdg 101 | WcTYwWuaXf8y9iDhjx8CTARmcHKxhMDyX2P4s2211oTOlflM9y3QO2hx9D48xVrX 102 | rFwyYtFbqsfPiryp7xlJpPuTP0qVj/Yoiah5v3qZukNUvxdI6g6sKT1OwGtCjmoY 103 | XJN6cOnVUpZt4vzmUg+KtQij5HHinvUmep73vzDGCCLlmb19Ol0LtjxebeLQFYNB 104 | sOfa+IehqRJMEBlkeJ5CVHrG5Am71PMzvbyPE6PPCvZNcbdIVQd/+ZT2hh1qDDU1 105 | RlLyKrpEj9Smn2jFGtEBXv1cBjOVls17vfKP5W20ckMHbktdeOqdp6X8Li68mCvy 106 | p1ZAhHAN9dh1QiX8TIkBHAQQAQoABgUCVA38mwAKCRCX1+hkeufke9iTB/9Qlkvv 107 | FKOm9SVjY1tE63PJRKI6nZkyDbtID81hE+nT5IzlJIB4ZSgXmfJ6jpH51FBdSTwe 108 | 6rDJxX5Sz4DCkcDQeo4hCJDwQqtqiHwpWMdyOVxm6BeujBU0S58B71ap1y7hpAdq 109 | groZztlXSJfHBxPecaQ02/uTthGH/BjplB+b8EUmnJE9DGQn4rRkQ7eBymcZC37v 110 | I+VGIV8LwZ4c5OvkbK9flv4f/vv0xCgEREwHv7QwOFwKddLIyK6YaWl/2cSoItKS 111 | 7yOL6EIGaAwBKCKNKjgT5ZVHeqkBRZ8rxMNAnlgFKtdD6WMMnmuL6eJAxb3+cjdL 112 | PcZZ97Gzr3vmPgWVuQENBFIb4SsBCADT/q58oLQgqiGMCYVw3hpkBhHuRwFKbcOj 113 | KVqCfMRiMuVGp+wZNJZ54fz5IVR7Qf8aQQTTZ0Cj1rWQnXXtAw4vC8LBrYxhHPCZ 114 | yck/uF59ok5GQdkbNWBiz5evdeXysDp2YDwNCz2YM37ghu9YGlN9G043IUwW20S/ 115 | U0L6JnOUhwXSLvs9+FSnOD2rqyZ/dPkEXgJGdiVbxAfB7fZEmUjEi8rUAmIJEbjt 116 | nxh39E2Mhw7FT2YYgCcLBQoPNpe15IzrjM721NWV6fx4WFuS8mCgb3BeG0NjMmRk 117 | H1yrZVndJ0B4sBGPF47C/66gykAolhzv7o3X4G2mZRcqxs2Y7+FJABEBAAGJAR8E 118 | GAECAAkFAlIb4SsCGwwACgkQfNgniXHw8TsP5AgAihrSq+H7TK5i/iqRSfj+ae7B 119 | 54f5LPVMVeqRrDaVnWqK0wGkyPATsspAneSFKQdYefBhhKKuFOOopO4HQUe0k2YH 120 | +4p92OzbmdPuUT97YtCtwF8Kzs/wnRgWOwkoC2GJotfrCO8G3iIJy9TM0QJf1yC3 121 | s1tkVIDXRrm+sz6eGmkPLPi2tWE5RG2vGTRJNVyMU9dqOnLbppJeg4WIpKPqJgAy 122 | woq9HFeHZ5sSVQ56GOwENRvjCeqDOTmbwus0MIymrcs3yHC6O0UEHPlpHzePNLJl 123 | /Otnj+wHn/N9LAoxr7gDpu/cpBFiPSLD189FCbU15FnFVAEuC5Vd9Y/3IhMwvg== 124 | =Gd1/ 125 | -----END PGP PUBLIC KEY BLOCK----- 126 | pub 1024D/4FB955854318F669 2009-06-30 127 | uid Tom White (CODE SIGNING KEY) 128 | sig E22A746A68E327C1 2010-09-23 [User ID not found] 129 | sig DBAF69BEA7239D59 2010-09-23 [User ID not found] 130 | sig E952F459299EB32C 2010-09-25 [User ID not found] 131 | sig 5E43CAB9AEC77EAF 2010-09-27 [User ID not found] 132 | sig 220F69801F27E622 2010-10-27 [User ID not found] 133 | sig 3 4FB955854318F669 2009-06-30 Tom White (CODE SIGNING KEY) 134 | sig 2C89EE98C987200D 2010-10-02 [User ID not found] 135 | sig 1209E7F13D0C92B9 2010-09-24 [User ID not found] 136 | sig FCB3CBD9D3924CCD 2014-09-04 Ryan Blue (CODE SIGNING KEY) 137 | sub 2048g/A306EFF1BAEBF3E3 2009-06-30 138 | sig 4FB955854318F669 2009-06-30 Tom White (CODE SIGNING KEY) 139 | 140 | -----BEGIN PGP PUBLIC KEY BLOCK----- 141 | Version: GnuPG v1 142 | 143 | mQGiBEpJ96MRBACBybwXhgASAa1EcV+TQPpMsBWz1uAuLLWJZsxSfV+Z/CP1PAOd 144 | 6NqSn2vTd9Hfp+TwCzGY/W6JryGg0hrgU28wwzbLxk40RYtQUTGKjKzruieSCrLy 145 | /ATmzKUfv5vRbbIfObhM57LnxN1Wto+w3cxfciMQqEH/JiX3ksIPof1sIwCgkPrB 146 | IM6XrNFFmQDRUS0CjG6iqH8D/1w/1S6CeUewBKBjuxM+ijMMNTRYP225YT9L3U8A 147 | WuQfyZHNY2BLWHxbq1TsQhM1WOYouxjumzwpCV1q/EoOG8XDguUY1aRRwG6+17xR 148 | BPDs196wBLAtaVFGRHAr2misolRK9j1ERo5FxExxNWRIXGbd3bluK9AVM0376WR4 149 | AIhuA/9/91KIgzHXiuJlEBPzx1nA6bUQtO3lxzW7BVwYslTrsCCL281EV9VPIPSE 150 | Lxzul7yAd5KE5z23SauNi0DLbXPR4i0h5XpvkfXsTHBmuyvLbw0x0H6DkDigxsDd 151 | QUfAPTgSogBlKWF/O8xF25XgduabmUNPIp3Si3MPeWRe4NMIb7QyVG9tIFdoaXRl 152 | IChDT0RFIFNJR05JTkcgS0VZKSA8dG9td2hpdGVAYXBhY2hlLm9yZz6IRgQQEQIA 153 | BgUCTJucAAAKCRDiKnRqaOMnwU8zAKCPZZ7Th3OB/REm8t2IqzVdF3OJIgCghwQT 154 | Z/7lNPpOSEB5yEC05X10RPCIRgQQEQIABgUCTJucHAAKCRDbr2m+pyOdWcw6AKCC 155 | u/hhEZ56fY+ntkIQptN8cxnecACfVrYitdHWFhVbiFyKb0kPf4D1hCGIRgQQEQIA 156 | BgUCTJ2TwAAKCRDpUvRZKZ6zLNmeAJ49MZsdDhpM2aS3BOLv45l3+VfGJQCfV0pP 157 | SKiKaWu6hKf1DiGWPzUONgOIRgQQEQIABgUCTKELcAAKCRBeQ8q5rsd+r8OBAKCO 158 | AZY1tyJ40AH6ZF0IZpJj2Wwk2ACfQDoTicXpxPlL6eF62F1vag+YQjeIRgQQEQIA 159 | BgUCTMi7EAAKCRAiD2mAHyfmItt8AJ9XgOEO4scVm1HeNcYMlIbNNucrxwCeMaFl 160 | WeMr96oqoyVCWyOimIWhRLeIYAQTEQIAIAUCSkn3owIbAwYLCQgHAwIEFQIIAwQW 161 | AgMBAh4BAheAAAoJEE+5VYVDGPZpTpgAn1WKBiMuTBboxnH+d2VmVJoCwuBtAJ9o 162 | B+Io/Yiew43PWOupTwoaTK8+LIkCHAQQAQIABgUCTKZ86AAKCRAsie6YyYcgDfZR 163 | EACGU9QJuCvgVU5Q3T+Em23ppoNC9KNr/wQhcYQw3bXozOMpm1fhlCOTIDpw21rx 164 | J3/H6E2HJJJOwn/ZqQ9T40T4jXAMskZsF/6s6SXSorzb4wfktnxZKM8B1uJ9EG/P 165 | oZzQ+55UHlInlfEFjA7Gg8ZEygd3Eis+VEerDVkUoA6FPE1vxri/vWE8Jx+r9Gfi 166 | s+fB74HQ44rmMZrwSfTF5B4vmxN1uu1oABptHk0PVwc3mKU+BS6pu0oIDuXIN6WM 167 | S0schn7xBqeVu2GOZ58UQnP74wJs/vsfQRJNhm+Btf5Zqmlj3OhnacAIZCv3Stpq 168 | a/hRT3k6vV+Y6BmrqOco551PB38QJGDIHuWGyWTd8s4e6jKYpUtklpJS6sNO2P5Y 169 | IK70Ds/Ssy0D3oL1kBjhvmTEVJnNwPupZnTSQSfbuGHc5CRh0Uhy7RsVnUtv2UbF 170 | zXbLfkfrVoUV13yh0e11Fu2k/CLfoSGzgzmzZPjdjMD06gBcYTkt8z4d7szZEENN 171 | 0mwCsggjDug3Wmm2k3UTFm6z3rOfn/P5pjqMslxQsBBtTNhxykF0f+xVcOx5PCeF 172 | IcSjtMiJga729bQr7KfpaEtaUIuL/EGyXIG1BFJEHmmR0aGMRYDHdpCDatdc1d5A 173 | WqFT7ZvFB36L48Lz1wSXPEqRm9g1vBFp01A+n/vUwwpESokCHAQQAQoABgUCTJ0r 174 | QQAKCRASCefxPQySuYygEACdyXqPPbwiAl4EBZLkXtEOSp4AhBTUZfYNHZkfrSRs 175 | 8wMmotlULl3aUxZ6tG5OBZJnxLMzjHNv+1q2iY2pxXvk7RPqj0IibPYfw+t095rO 176 | Zgo/iL72cEJ1rNSkrw2Ns5qai/POstTW8FBbxqr5n6PkfyxBOZbn4tgY8aeSAHBc 177 | AijR1x3CZmlbCfdcryoJC9x7E0nNorh9H2FMhqlFejq9YsxC0eBd5kHLYNVOZqXV 178 | 8NlBVbm3C+Mn5PNMTE1/p2XAKV8W4TK81oG+z1Df/KE7OibRv0xo4Z8vwi7VPc8M 179 | CG2oJuyeFXSYq78Zuvp+RqFpdzHhhkSWNFJKC9Yp6v3UfS6Y1ReH3enITqHiO3cD 180 | 4Nf95mHO7HEbLghzj1W8MdlVronoo4U9+Qde/HjRFHzydsIRi7MHCwyb3/DDMMBO 181 | 5Xh30phRqr1BzfkgviMHhW084PSiQmRO65aobLygoDDlKlher9QGg2HtAn05TfRA 182 | UQ7tEcPUVrfzBgfQeY8Lbq7q1h5/J2iYoVQwU8rLWBABFZJ7WzsfyX+hcLUYYzK7 183 | kUacmYGFbbToeC6vYW7omdvpmfHkFBKWu5tOt9ywJHPulQ8oWmsKP4/UUYxZpLUd 184 | Rp5gRuBlVLpH7tHrBjlwCA5K58p8RztcsmXyc7VB1ldq09uhTDziMEjvFhconDq3 185 | k4kCHAQQAQIABgUCVAiV4gAKCRD8s8vZ05JMzQQDEACkK2Y5nLfzozXO4CvS69v5 186 | wiG9MjaGsfNaFTkpdXrgL2mnT0NdKnUXUD3ujrGHyosOgJm5f5qfa0L+yqpDjXzR 187 | /BmIU1VtMUM0+0P69ewSZ4Cao4OSpcOTmehhWjGQnJFeZVJHwgP1TXpGqzxaCQKK 188 | SWQGNFGUzhOIGfCkwY7m7BotoKZ8OKceYqpxF/7DdqMwy6TAP8/Qvzf5OihQ/HJS 189 | 6R91PGmbLNeNg853+l8YQuWHE9yQ4yxSUNgGcu4lltyEoKV/UFHDcDOxCkqHmOGY 190 | rAX4H7csYS6eA2i60iNzwHeqs3QOrBE65/nq7Bydgdo0+4AbrM3oxyQ7LsiS+z7n 191 | azCn1WcVDXieOr0ypqOX2t7YnmbPydBuFw4hFBID8U4DLxIHdI4yMqyaTEW2DE16 192 | L3ayK01cdSkGsBhKIadnw3rxqFAxn4S6p7PRjwz4caozG07lWrF0SWRfRFZlgpN7 193 | E8RJt2aGqwt1/vc/cK8J6l/7aZ6rr+lNgV42z5c7bqFkLwgeJYLQBwulhP46+HwF 194 | s9TBZNUeKFGFZJo9P7AGurB/eF2XBiT8dcxzWFzHTtCsWrNjcZ5qOe09wKZGe3qw 195 | z8Zikjc0gBbwLU5XWzxx7ldL87UZ6jqautJ9vnbGo04fXtctT6B9SA6kzE6RVXDa 196 | GeK2IrV5DCEDY3JP7O2QWbkCDQRKSfejEAgAuZ95vq2njyWx+FGBb8P3NwN4UR1z 197 | ctLEJbaMR1reLRqfW4V387z9F8PuCIeKWnFhGceszqZ8NmmD/BZqxD4kOO2ExXoe 198 | 5LaE4HlpsRzStXURfTV4GAXKcnJzFkbYDOnHksqJD07aLSuVIgNqkOvFDulyDPjb 199 | H17p084nEkoCtSv6fSsFsDTAZYYAgQ/7dmhAtc/XBKd4ybEB7Via30wHDO8WMCgM 200 | 06KtkkMm8rIe9RFdSayt4JGyjEif6X31RiOkW+WAQ956LgriZLqzF8/9+/JJUwsO 201 | xIP6XwEvTEtHiuCic4ARYbteKdkmDQZATP9M7QcEhGtQiSC6XVfs8srl+wADBQf/ 202 | UrKGKOCQQT+ka6CQEmLaFlwJVpODo4cCsR9QzRrcCkDy9Ngks3zvEC9HhfwckWhH 203 | COSEvdMBmamAEz+LH1QSUibKIL9YdGXMERi6PcHdy2mqvkeQUYx2fgdPcnyP8xl1 204 | 7PblQXnxG7xfKXw8hh+B7At3v9gsBd6ihraOp4/BVDp6kRd4vC84TbGW1YfHJfSA 205 | EUmCw+ZsEbVw2gkDOHLYNb7jd2kKZbHaJUJtW3XBBTGRXfyxp9+G4+ilJMjaLtNv 206 | ZFgZQY9MeWJsfbxVGFpOlX9LHutOlTz9NlBbhH1uSgeIN4OkMxXr6Z1KeYiJt/50 207 | yDChOIyfrt/T8ooJQVaI8IhJBBgRAgAJBQJKSfejAhsMAAoJEE+5VYVDGPZpviQA 208 | njVeVF9MewkYAYXYwxDQs6J+KIx4AJ9xqFuYD+KbUSGjAUcDyaJPufpZng== 209 | =kUv7 210 | -----END PGP PUBLIC KEY BLOCK----- 211 | pub 4096R/FCB3CBD9D3924CCD 2014-08-13 212 | uid Ryan Blue (CODE SIGNING KEY) 213 | sig 3 FCB3CBD9D3924CCD 2014-08-13 Ryan Blue (CODE SIGNING KEY) 214 | sig 4FB955854318F669 2014-09-04 Tom White (CODE SIGNING KEY) 215 | sig 97D7E8647AE7E47B 2014-09-08 Julien Le Dem 216 | uid Ryan Blue 217 | sig 3 FCB3CBD9D3924CCD 2014-08-13 Ryan Blue (CODE SIGNING KEY) 218 | sig 4FB955854318F669 2014-09-04 Tom White (CODE SIGNING KEY) 219 | sig 97D7E8647AE7E47B 2014-09-08 Julien Le Dem 220 | sub 4096R/F16C5528A8B58800 2014-08-13 221 | sig FCB3CBD9D3924CCD 2014-08-13 Ryan Blue (CODE SIGNING KEY) 222 | sub 4096R/86781D4FA4B2E9B5 2014-08-13 223 | sig FCB3CBD9D3924CCD 2014-08-13 Ryan Blue (CODE SIGNING KEY) 224 | 225 | -----BEGIN PGP PUBLIC KEY BLOCK----- 226 | Version: GnuPG v1 227 | 228 | mQINBFPrjf4BEADXZaq8A3nhpvnoG78DdYXIB26j0su+i6y/meWWNA1QHwMk4e7L 229 | KiEz0YtJcaBM3HkDBYO9tbhbD9VoFf+4YxQ/z0eFrcdIK1vqWjmFjzE1z73px48F 230 | bBmOAx6Av2SSvI5Xoo8VGo7fQVIGlNSz+YjWJLSGfuy2nqLxJMdCOLvD4pAFNHlK 231 | s58IcAyqjTVuqICedGvrHkTQzRd0/ub8Kcxhj/+tU3MBy+DoY427rLUgDX3Lk6iu 232 | uXBlWOFAs9JLJFBBmGoTqzNivljHQwo7yrfpRAVn+1lKgGrDcoTj7cSQPAMMUT6T 233 | y61itg9NB44lkeRq71g+KfKuuH3YFZUNaI+tEc7YDkwxrCcSMPr5Ami/irDfzo7A 234 | oGonU/8DIUiBgcRoxt70H5J3xNj/CT4Vfi59BWTE6RxoCZouoYF8Zgbl1lxPBFLc 235 | NC1kkoMTEi0DeTEg5jwyunNmUiwatMBSDRLIK9laRLkPpGZ9FJpO+VZ0Vwt0GaaA 236 | JVvuCvPzGWn2/Gh93illyJON6wOW58nm5Dk8UOHwxyf+I8M0cz5W+Sjp57lCNRT6 237 | Wl1DcHxrABql2SyvJFXaSrUwJKoo3BJN2zbcYXdkm9HdGr7SOU8qc3mYyf0mzMPv 238 | 0XFZk8U3mdB7R6Dfdl1R3IzudH9HgH+Kk//9ptzTXJ/QIr31jP2eZLgu3QARAQAB 239 | tC5SeWFuIEJsdWUgKENPREUgU0lHTklORyBLRVkpIDxibHVlQGFwYWNoZS5vcmc+ 240 | iQJTBBMBAgA9AhsDBgsJCAcDAgYVCAIJCgsEFgIDAQIeAQIXgAIZAQUCU+usiBcY 241 | aHR0cDovL3BncGtleXMubWl0LmVkdQAKCRD8s8vZ05JMzecaEACGGrbjH6844WbO 242 | D2D1M/WNFNMP7JtDX4DCEhwf3/ca88jO7BY/vXvbxY5QApVuJBS5OHGvEHxrQrx6 243 | dV2yVJK/ZL0MpXdieoSDhJyEkfQYy4wDMqFfXEtMBZzo4vK7OmZMdc0FwmAvvCNH 244 | /RJJfShg5X4bA9EA7hdMxOd9TbhaBfi9JjfO/w9gwcF/bQjpyV3lHuB1+SLZDZee 245 | uAKdDYb4jjXA0hxsYa7xAd2Ou+fRnwhJzXUmboERVrFsWV1w3mqHlE/dUuyd3XjR 246 | gqNN5wsctAqBFZlHNsxaT0ao3NJuoBeWxKsJj6l2h2OG1dqeEb6soWYKvGhHV8cJ 247 | 4UiaDe6oVT5BgCf051BgNnvrqHWQy7rJRP4/kLsSwdQvAk+Y6I66n0mWuTYSLFeR 248 | +ZfyTjWPoHx+1UQ5N/5XaZoYG3icwG5kDZafEP8smkL53XZjbbI5wWXF4km+8/JI 249 | RvjzCY6FLdzHKDrPbHpl7KK7UOJ+lDyEJjtSlZmzlaiArNPpANPkreEvKkLLjLeS 250 | ElmCEt1Rk+29Mc5mF0cbmq3r3KpAW48oj8FZDn1iAoM/1Nr0hcgQ5k3a5ndLajYp 251 | TG9rrlD9rrw4UB6lbN9VeGt0YjFUj948TNHOVzrM8MosivXhoC62wXW/REkYUSQ7 252 | yjSh7ZeBbhySonhcj0fY6wdRpBcfVYhGBBARAgAGBQJUCJUaAAoJEE+5VYVDGPZp 253 | bhYAni9sjfrVJ4c9F+3B8lKn6eQmU/jYAJ0UuEXj+RUZWKfSxFXFvuw1HXrRkokB 254 | HAQQAQoABgUCVA37AAAKCRCX1+hkeufke9EJB/9hndGX3N3NIM2MUbOGqIc4YPPx 255 | Kzn9jYMLnKMQO9gDN+MILpGZslbGYQBgdF9ci+0q2N/oOw8cSP1z4AKSufT71sdh 256 | cERpn63TjqNApTmX05Hs2Bo/nJsGQfVwkMKZmFU1qxKAo4bO3ncVJcM2wzDcZJEj 257 | NhpA9MrGk06oToay9FI1qELd4zM3tUTwEj4VwwfuQkynmTQYTAsAKvV0urPVxF94 258 | OL9S1hnV1LS9Mol+ueXC+TQwE6wWXvcnjflERJ2rFHf0QnmCLIphuV58DNbr1Ex9 259 | IUWr59VUn8S2C6F4N3VOIJW/IPi+iMwQ12ZEvuODMxv8nQAT7Rzhh0ivmhcNtBtS 260 | eWFuIEJsdWUgPGJsdWVAYXBhY2hlLm9yZz6JAk8EEwECADkCGwMCHgECF4AFCwkI 261 | BwMFFQoJCAsFFgIDAQAFAlPrrIgXGGh0dHA6Ly9wZ3BrZXlzLm1pdC5lZHUACgkQ 262 | /LPL2dOSTM0WVxAAumwSOTDrn/l6uBQ7dasCdre+qglioVV1ant8b9BDC4+zLH7p 263 | uMgcAZx39H20enT0bKjo/GL0Svfu5bNCONWqLLri16QwmA0uoWJ0+eQ1IytIT4UM 264 | 2NqS+xkNLu0Rdho0zvfIyU1c7eCqmUu+zjSxO7aQF4Q4Ndp5mvXU3HS449uwoNbA 265 | IiT9qG8tucyUnE1VmAj0TN94AIQ+Z1L1+1G5Zd13VmPgpoPQ2ygmpTBBNmwXC9o6 266 | qBaleHuMFN/LnAi4VYKcESx13QA1p/bdVFJqkXP1B0Z9zC7YvWkuVq4/oXbnpqTs 267 | lg3PWs5RlGT1uxWiplHjVQVVgD34cipZkzfEYuXKvRIMamoFh3QQc2EHIvZM5a6I 268 | i2afnRCaFjOWmWeGALGaaY6FznylrV+hr7tX96FU8W1e877PVJ60LF05da4MSwMQ 269 | 5LbYqUj8fcY+Qr+gMWVhstPAtqg+ziutX+A4EclEFcDS5Z8gv+vBp0AqMztoBiXB 270 | KVyIqmCSbFCTeJGJkqr2nRamo7ypF2OOFENG9LKCdDXxDOrWKCNAL4tnv04TXzV3 271 | 2tCqk99sWMtZftF5aTb41l/LzB8Q6NvRaiqLoK7lj7lVzoTuKXaKViWCE910Jc7m 272 | src3SKrpDuYZHfU9CJteSmrIE/4S/tFUHk2NVx8qTy3IZ7VPLN96E1kQ2CSIRgQQ 273 | EQIABgUCVAiVGgAKCRBPuVWFQxj2aUEzAJ0feVLNnNP+a/qcZ+HX0y1DKHdx4ACg 274 | kAbAFIiR23s8NM5+er65tbRk0vOJARwEEAEKAAYFAlQN+wUACgkQl9foZHrn5HvA 275 | XAf/V75kZpT3ggwvGEI6J2FCgVWuUeK6wN5ax1sH9wzufvZ68XG1sJa0P3537ZHW 276 | KdELAf+JD2XuvFRhWLKyLiw2qQ95jlLB4AJVdR1pRCMZTz42uDd8/AROmtLghEkm 277 | Hd1yjMvmmP8nhihqG9tHfw5f0QDoAiADgk9d3v7gD3E9g4bfZPJxqrCmexwzCyHv 278 | lJa+WRgaaU27sG7pCCWch9CUrAHV1SZMOhh5rAfcaAzRIxuAv1WFo3hBNseJhyxx 279 | Fwt/sp9ykcXppHASB8YcPD25nCt2CPPC9kc2Rscekc+NwjTeOi+q/bAEIqMQIQAP 280 | lhGTP0qEWtIih29OsSODFDphS7kCDQRT643+ARAA6pxglq5FChzdOl7GJDrDyyPJ 281 | S9/92OZGSCoEizk45siG7G9N6Zf+CyQalVBUSq4pazkDCvF0E+ZT7G66ebFeq6Yk 282 | D+qNnSC8mHfjf3n2yJc7FaXz5PjpqAdgAWP6MkMCrjTg1esXlim7S6Q3XILfFG7w 283 | Q2O402vadXI8PigxRlrixN6eUxA1mkjnevkXa0V05s5iL1C18vc4Z/DTloF1YVLk 284 | +fk86PJtS6Yo5bJk2bdUHcRwvR/OmW0uu8BuSpntg8HSvuoBHp91gsPZkcTgeY8H 285 | ELZR3p5jBgy+8HBhFSUCE6Ich1vkdxXz4yWgAD4via38gs6yP1haNtENhxOyNqS5 286 | 3Em8fpV13iLPsYpo6Me2o8TecaU16yi7dp7B21fNHVcS8+n8FVMmj4LknI6Ox3pa 287 | v9HVqmnuN1QW3YD8yeUGLC4SB3MDCrkMtb+txl5eo4v+gnqyCXKDIum7uZiN/YlT 288 | 6CG++5XisZg+uEm03L8ljPUblBJNCgFV6/fV8pKQram06NLi5R+O00T0C0rntDMw 289 | 1t3QtH6SBueHFybmUF1NEpcnmwz7cSHnf1cTF5ku0upeeSVccsHQzqgqz9tPRmUl 290 | ZcbTZbdUaqwATsoE8bFF+MLCP0pSt+xUntsds05F5Y2ccyw7eqn4zoyKYNo57TMw 291 | TTbRCEXZFX8cTeQEZwUAEQEAAYkCHwQYAQIACQUCU+uN/gIbDAAKCRD8s8vZ05JM 292 | zUyAD/9YzYP6Ft+m21EWLTrpWc6b1DJtVxDxmaqUcB+WYaFSJrpsexsw4a0Tgydb 293 | lVEtL4dvWX3Bh7gZdb2bv0MAUhabu99MiCbBVlPG2Ikpeygr8QA9p5CB5bcwHlnC 294 | ZSvK/MxxpskIsBlhwI1GA5u+lQP9WDY5ppVo1g+HfCinjGk5UKEYi7dUKWYN8wgL 295 | WclbnDYCkiijs8dMm2e94eJk7p0hBJ9DYj3ypOnv8zY0S7DaLgi1DaRxQ2rdIF4c 296 | f1z4qlw1bYCvCOMv4KhISp7Y/Hz3SOYIGNMfdGMBeJRNoiFzZL4YNxDXg4fAtvtI 297 | uZ9164M3B1I6knZD/kyOVRTQHleloEkCBSrarZhOcmYLqZI45ImW6R6bwX8ZmBQ2 298 | tX/GGMZP265QI2dB6OgT0w383c9NpshgWFiRx6HyF2y54daPxpXcH1dH5y1Lgb9o 299 | QZBNl+PcsjL2GK3Hs85G1X7GOx2M15kFGNsrQpM4Zbj3YffRyQg+6qwGpU482j+k 300 | grtkKrQr9vI3Q1lcH2yvar8ddQyTqh//WVlOFuzYucJpfY9MHt2sZ2UWJZGHWlEk 301 | JkLV9qaPixi1r7RhnRY/8RgtvOrfTtS4IJDXbtMvL6dmrumZ0bBk2VVuSx9LeoJY 302 | JWVpd/B/k+mxeaSDGHebvmM10A31YBkAmTrHOzh3Ob5ur1xkH7kCDQRT65C9ARAA 303 | rQYJI7piRFVXrEAsJowZyzwMw3xSDszwUqXeKEUBaBWym0ep/Zzc5GmbzQ4C3zkO 304 | YtrD6TRwRSqUsk15eo/kl2DFI98EoXtAoLarJHFZ5A3za3HhgO7VkQCmQxYr8mZC 305 | x6bWoUcsddG9NALsl2CecpfJw8YEDisF+9ZQPFng26zGDffm9NbiuyoeXA3WtNtS 306 | 0328VSvU5/bWF0ktsCiuYkjo5gvaW0wcDwgx6cnyDAB4CI6vSR1Pz0VEPwqvLpDM 307 | 6B3y/mQVG0CmlQJsoFGSRHHJBXYIdZP0APpAJbvfg733OiMm7ySQfNhAbjARw1Nk 308 | A0rVnHUW08qyaVsquVefQHBwktd935IWBIOmgXPFHXYlxuPfbf/5X8jQLMpi1+8x 309 | 7960Vvnc01FdEB8aHgnMkTLlXLW19fZ6IBbqmHpOp+7Bg9M5M0zL8IV89ypKFmMC 310 | RVCqKWf/YUYM4x9Wh4N3t2IM17v2NgAC6NK2PrSNZh0qhLMywSLL+32hzgPYx1B4 311 | Oq8WNWMhFcuRIk0PqkLuBseEa4Im9vPw+PBg0/HARVWSxEV80ykq+i+4TlYiTm3k 312 | 4J+8sCD2So2ZQ0O3VIxNr9odiKca++Qpsv7HondUjfYDX/xBaSpBgm8MkopPAn2t 313 | dE80zQR/n6n8HS7G+skjbIV5Q03NursIKPo2Yyf/XTcAEQEAAYkEPgQYAQIACQUC 314 | U+uQvQIbAgIpCRD8s8vZ05JMzcFdIAQZAQIABgUCU+uQvQAKCRCGeB1PpLLpteRC 315 | D/0VxcUB+JVcyNxewKe7uPVc8OP+3Vd0lD3lMWN9nxRc9qvnWT1dyI/o42lmmOxH 316 | dWaoGubMNn+Fw+AA3gQlpSLkTnqvDKY629hEhcct2v4Ulml9JE5KJ2qsc7qKm9K0 317 | 143NdsPcCktmUKBeLcTA4kOtthzmlA6k7z3Miuk0wKfv5xwakMlFtN8e329Qzoxe 318 | 55I7Ker65p7uYPhMF8xluBmWVB2YTW3czpsBjZkzD03bHUAmHWxzIPuTIytRCN0X 319 | tq3eZN3dR+O+9eTP3hBUvg1xc0tDEyHSizOfD5t9hFWRmblA1f1gjUBZOB1/xeTx 320 | 5OghdUdV39REYGzODelnBLflf+6tF890Dyd+2vRXldbEf9QqXc18Krc6jCuZldFT 321 | qHUw2JTG1lK29wq37H0ZYR3HZZl7iKLioLMiROPRb0UbRwMy4paa79SQB9PL3tjE 322 | DGjIWH/UAYh/AHG+yE35mYRs8pg+G0Jy1u4uaYI8OSPa/24PHkBdHuHtDyJH1ryC 323 | pKazjU5k4mnNiHYeIDB3AjrnqQbKNZF26AizTOApqwjPAMs9m7IOCZ/0i9EQQbP3 324 | W+pb1FNkdrjDMpIqCnWQscrPH2FXpvJvmSW5UHAbEifGaI4BRLJxBn+VOniTrD4k 325 | aBBzaulOdIdDL9KEcM9I2WhAAl3iov69JYWhMzsFzZXR6UQvEACXt5WWpgVOB0Vb 326 | 3DJCkMSsCmUHt8iYG9aGqQCK1YKN15HpCVoiP3+zpCBx2PcXmwRwGB2u4HGSYY41 327 | /YW3OInT1lX5yM7jn2C3TKKYaS53lA77A9t8Z23dg8zCVttIIKnbz9ejc/eb6dwK 328 | LhVqHUtk1oipMrVWALRRi20fSfIqitsfj1IJkmyGim8m4C4fXCjKfMrtykedyojM 329 | BIyYSnPtQIs4WsjoMEBSEc6h3Uw5PcNwFtRsu0USOM5jyFhwO2yF6/ebJ1EeAnjh 330 | bjhY0ZZtavWZDaDDHY5E9wxmZM10jdubxU3N5K2T8OUWd9F7TH1YTUWewseZRaAj 331 | SuoPu83Yboft2cZ0iqO5XEif5LNNmQH4KRwZmwY+5s8/zZPeXuN60Ij6iCDA181A 332 | 6pRoQmnLk1zbrsnlTv0rxaOaelSecPrHOrbQGIHT5tc6ItliZ2EiacONmuCIm8W+ 333 | RL55E++TPh/Eqyn+PnL2WP7azwfDZ0qhDQHBs+r2+G5Qp749H2F29zt8j34T3rkb 334 | rEx//hthc5qG2W49kASK+2sK0gIqeHEkCBudcdH8rpfoIXx7cRfR3Pk+3o5GrZVf 335 | BM83UyGyWEjVQCR3/E/ag0jKwmsnlX6ofGFfS6xSqKK+H/FoLsbI23dS4o6bF4QA 336 | HT2hxY8ondF9eKU5rnzLGRFYmm1+Pw== 337 | =gSQT 338 | 339 | -----END PGP PUBLIC KEY BLOCK----- 340 | pub 4096R/90DE59A3 2018-03-23 341 | uid Zoltan Ivanfi (CODE SIGNING KEY) 342 | sig 3 90DE59A3 2018-03-23 Zoltan Ivanfi (CODE SIGNING KEY) 343 | sub 4096R/5842E3B5 2018-03-23 344 | sig 90DE59A3 2018-03-23 Zoltan Ivanfi (CODE SIGNING KEY) 345 | 346 | -----BEGIN PGP PUBLIC KEY BLOCK----- 347 | Version: GnuPG v1 348 | 349 | mQINBFq1Ew4BEADHh5yEROn9b0g2iVFdNeSNBidHKuErYQReqWWEYfReRL5gu8OX 350 | AePJyIC94inupY38vt6yxj9oQzoSwbSP9jRJODGH2AMxbZhMHqrfrAJLBVYHmv8x 351 | J8BP1lG/A0TVkQTTSkysKllWcz+QJB8sz5EksLOOTp/hFjJrGMntzmM94wJorCo7 352 | 9kGksY195WJEYaFGwf5ZRbYksPj8c6il45b5eFxAZ1H3cNoCZDAMxVDayezY81Do 353 | MBHfdZO6/scZ13KDGO0zHXFHxp44AZIyCbqB09QRz7RPlrrUiHa4oV8gJEav8BqV 354 | 833m0ajfncpeqtyLoQ2bweRPdc7WokhqgwFx/5YIXTE7xrEECxzFv0n2Ekg2na1K 355 | Z/uf7B5rduoNGNvuf/M6ySdzSfHV0Q7/oYXeUaFRqHlVtH4+HMxKt/oOlAxRsnRf 356 | 6NjtxRd93u2WJarUK2tGyo+KcNck+0/W8s987WwhYXnMq8YgP/YhPD0Zw8A4axOa 357 | wrhZ8SePEtLTffk3h5uJDQZdzopONVLvmufvbvUL1vqYQ6bTM6C06FurQfI3aJA9 358 | b3Vlr/JkZI2gmfLmQ4ReJsC1XfZ1IVjibzvyi0njIvlTQhMd5qluBbKlFRcf2S15 359 | Fn1WRX1gNSeZdpEbR62NcAnqgIycuYPVDhfs9fm+Ogd7mRfCrhpOvIMCFQARAQAB 360 | tDVab2x0YW4gSXZhbmZpIChDT0RFIFNJR05JTkcgS0VZKSA8eml2YW5maUBhcGFj 361 | aGUub3JnPokCNwQTAQIAIQIbAwIeAQIXgAUCWrUZrAULCQgHAwUVCgkICwUWAgMB 362 | AAAKCRDzAcr1kN5Zo4PqEACGahN0HtTbt1kJhtYS3nMwQYTI73PjL5QSWqHlTdNx 363 | OfjRU5jMjaNpeNwjdx6hxLp/KnI5DZR+19MwA5trUQ3ZEAYkCqU19dmfaIB9rsVv 364 | JMeLXNLuSv11reOrvLYFs8AcWzwIzhPBNz4q9xZqloVE4aCsRqm25xpJae5a8eDG 365 | mPZdbjIBSD6Na+hai9l2egNQdYbvzD6Qydb4XDq8Se3RMq05f2RLOTYId8qb4inD 366 | es1jQi+apUDSZ+WIL7C5UtS6nlzXDnXQtIfHfJsAJl2IW91b6wnoJPMlHtt+3BJg 367 | 82nI8XGIEeDRQGGhLC/ZfkWc5OXapOhDhYykxuGBurvLzq7dPp+iJcs5F1W4PX7I 368 | xzZD/2x23G/Eg09DmVWYkeKeh3HmwqcDbYN0ApgrUmRuwueAqXvhoEe6kxaZcLrj 369 | otDSmZD0vECOadhOgst0kYHdFCgQL5MoPQqJNHZDPsciq7WiiAU9aF9DtWJy+6Zb 370 | 0b5TyaCoT4RaqdJj7AU5bR44BYwHwVTy55UEsa8jZxyvK4kGPFgXwqPwW+lxteiv 371 | k3edHALBEdVZEFs2+xmiz0ns3F4QZHdj9qBG5GGw3jf9iKBDqaerIviEdJS1/yzm 372 | u980v7jcpOwg2ZsyTKh/PFmUO8tDHszj68RbhPzdBPNXpXhtEYSdOfSOdeK/g87c 373 | L7kCDQRatRMOARAAzSPx83m+FbeODkApJreD7A14rlT+gMsMaQTapjD5XDHmuS42 374 | sO4PtV4pGAD4q/KnZzorV2u9tcRxteinALcCoKlP7PoB87tpqUELLkUwgDZjNfNz 375 | /GipyJFSdcT2waBY+/03bVpthceCxIV3b6xTm2owrJgS0Exd0b21X3zELKiV9UC6 376 | Pjtd1qLsKgf6N+RvIbT8De2CrFzyy+iISvnZTFMEDE9rnkXuwY93OLtOHjW9rncp 377 | x2aLYmxuoUh8fKZTcWTXe/uG7/elED08aUwb8JINjSNTYBugs/2OTOpKW3jbti0h 378 | GOGk/AD+sKNndTG66/nYD5ED6NW0/NleHCDNO+vh0vzjSds08daotj21Z/2sWY06 379 | qxYGOkTEQy4i0DyTxylxxvPk+c5pTIHupcLsRjmjl3J45vPANnkj4lkNMTdlkabJ 380 | P2lglwOV+fmW+nxGmW/83AxvNun1dMrHCV5oZXIR5eblyHGMwBpzonl7kOFTIagG 381 | wcJJK/erJxvFOdAYuiXkq51/DxlK5KNBIT/G1U71EzFRCU/jK+rdI+fAMmoiJ794 382 | F2PTQwF5NxEr28lM6qOC1QjF5gxVAQU2N6klP5R2Ir1OrIo6RFrhWO+j1AGnUYjE 383 | zcKLf/DuNzGkO1CTp25Z2mROHSc9vdhSm17EcfCzSPKIrCjkEKeW6Xi7N98AEQEA 384 | AYkCHwQYAQIACQIbDAUCWrUYgwAKCRDzAcr1kN5Zo5jVD/0UUCdJL4rEQ0PfQoMs 385 | Gtxx0xMl4ASQQM4ENVBPIzfhXMe3g9iRZkOrNAuRF2KZ3Hr1ekfM4FtcOX4ZGB7t 386 | TL9ai0QIWJYHj7eWQIpno1sHIQQhx0VpA2Av4gxVdfR7aL3O+rm7QLZU2TPXWd3o 387 | wiBn3BnWKgv0j6XmvWH1Yn13OpFuWjt+QEcE2W0wNg8MP7J+fz3XjC84BucMnBQv 388 | hgz7WkFATnWfwwDm+UB3pmibTqC/Kvia/GZzWrwGc/v73XckxnALMfUXV35KHAY4 389 | YXaLDrHu3h5SnXdoKFnyBkHwFZFlFYWSt47SYpeYvaWDUF1aplMXgH/xYoySeGMt 390 | 2GL0xZKE9SI2xwNblqR2dmTOfTjO9HnkI6fYW4VuulBrp850DAWDaluKGoggQaq0 391 | t7qTBxOB4xA9tci9x347Oeq1QnBJZJnkOnEqY56GVG/0ACyemVaPNEg+0B/sD4Uq 392 | 3JyQhtn/+UAlyL8Qg98ExOXqVMGK2+wo9P3aZJbR/TCjmNEsPJPWIITVxVHrr5is 393 | 3Y4InJ6F8pt4etNyRtreOA7OpJfL4z2fYgtxPeOeSkKtI8/hU/x7pbJP40PKiNog 394 | EHa3g1YBk2sRqia3cCVZDEYjLymiJAUnyCWMktGWajs+931V44QSGGM+vWi/DauA 395 | VHP5p3w+PsIm1Xf2o1gQl2N2rA== 396 | =a8/z 397 | -----END PGP PUBLIC KEY BLOCK----- 398 | 399 | --------------------------------------------------------------------------------