├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── bin ├── clean ├── run-python-tests └── setup-spark ├── project ├── build.properties └── plugins.sbt ├── python ├── run_tests.py ├── src │ ├── __init__.py │ └── lightcopy │ │ ├── __init__.py │ │ └── index.py └── test │ ├── __init__.py │ └── test_index.py ├── scalastyle-config.xml ├── src ├── main │ └── scala │ │ ├── com │ │ └── github │ │ │ └── lightcopy │ │ │ ├── package.scala │ │ │ └── util │ │ │ ├── IOUtils.scala │ │ │ ├── SerializableConfiguration.scala │ │ │ └── SerializableFileStatus.scala │ │ └── org │ │ └── apache │ │ └── spark │ │ └── sql │ │ ├── DataFrameIndexManager.scala │ │ ├── execution │ │ └── datasources │ │ │ ├── CatalogTableSource.scala │ │ │ ├── IndexLocationSpec.scala │ │ │ ├── IndexSourceStrategy.scala │ │ │ ├── IndexedDataSource.scala │ │ │ ├── Metastore.scala │ │ │ ├── MetastoreIndex.scala │ │ │ ├── MetastoreSupport.scala │ │ │ └── parquet │ │ │ ├── ParquetIndex.scala │ │ │ ├── ParquetIndexFilters.scala │ │ │ ├── ParquetIndexReadSupport.scala │ │ │ ├── ParquetMetastoreSupport.scala │ │ │ ├── ParquetSchemaUtils.scala │ │ │ ├── ParquetStatisticsRDD.scala │ │ │ └── metadata.scala │ │ ├── internal │ │ └── IndexConf.scala │ │ └── sources │ │ ├── ColumnFilterStatistics.scala │ │ ├── ColumnStatistics.scala │ │ └── filters.scala └── test │ ├── java │ └── com │ │ └── github │ │ └── lightcopy │ │ └── api │ │ └── JavaApiSuite.java │ ├── resources │ ├── log4j.properties │ └── path-resolver │ └── scala │ ├── com │ └── github │ │ └── lightcopy │ │ ├── QueryContextSuite.scala │ │ ├── testutil │ │ ├── ParquetLogging.scala │ │ ├── SparkBase.scala │ │ ├── SparkLocal.scala │ │ ├── TestBase.scala │ │ └── package.scala │ │ └── util │ │ └── UtilSuite.scala │ └── org │ └── apache │ └── spark │ └── sql │ ├── DataFrameIndexManagerSuite.scala │ ├── IndexSuite.scala │ ├── execution │ └── datasources │ │ ├── CatalogTableSourceSuite.scala │ │ ├── IndexLocationSpecSuite.scala │ │ ├── IndexedDataSourceSuite.scala │ │ ├── MetastoreIndexSuite.scala │ │ ├── MetastoreSuite.scala │ │ └── parquet │ │ ├── MetadataSuite.scala │ │ ├── ParquetIndexFiltersSuite.scala │ │ ├── ParquetIndexReadSupportSuite.scala │ │ ├── ParquetIndexSuite.scala │ │ ├── ParquetMetastoreSupportSuite.scala │ │ ├── ParquetSchemaUtilsSuite.scala │ │ └── ParquetStatisticsRDDSuite.scala │ ├── internal │ └── IndexConfSuite.scala │ └── sources │ ├── ColumnFilterStatisticsSuite.scala │ ├── ColumnStatisticsSuite.scala │ └── TrivialFilterSuite.scala └── version.sbt /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/.gitignore -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/.travis.yml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/README.md -------------------------------------------------------------------------------- /bin/clean: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/bin/clean -------------------------------------------------------------------------------- /bin/run-python-tests: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/bin/run-python-tests -------------------------------------------------------------------------------- /bin/setup-spark: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/bin/setup-spark -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.8 2 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/project/plugins.sbt -------------------------------------------------------------------------------- /python/run_tests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/python/run_tests.py -------------------------------------------------------------------------------- /python/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/src/lightcopy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/src/lightcopy/index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/python/src/lightcopy/index.py -------------------------------------------------------------------------------- /python/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/python/test/__init__.py -------------------------------------------------------------------------------- /python/test/test_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/python/test/test_index.py -------------------------------------------------------------------------------- /scalastyle-config.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/scalastyle-config.xml -------------------------------------------------------------------------------- /src/main/scala/com/github/lightcopy/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/com/github/lightcopy/package.scala -------------------------------------------------------------------------------- /src/main/scala/com/github/lightcopy/util/IOUtils.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/com/github/lightcopy/util/IOUtils.scala -------------------------------------------------------------------------------- /src/main/scala/com/github/lightcopy/util/SerializableConfiguration.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/com/github/lightcopy/util/SerializableConfiguration.scala -------------------------------------------------------------------------------- /src/main/scala/com/github/lightcopy/util/SerializableFileStatus.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/com/github/lightcopy/util/SerializableFileStatus.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/DataFrameIndexManager.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/DataFrameIndexManager.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/datasources/CatalogTableSource.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogTableSource.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/datasources/IndexLocationSpec.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/execution/datasources/IndexLocationSpec.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/datasources/IndexSourceStrategy.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/execution/datasources/IndexSourceStrategy.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/datasources/IndexedDataSource.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/execution/datasources/IndexedDataSource.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/datasources/Metastore.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/execution/datasources/Metastore.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/datasources/MetastoreIndex.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/execution/datasources/MetastoreIndex.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/datasources/MetastoreSupport.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/execution/datasources/MetastoreSupport.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIndex.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIndex.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIndexFilters.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIndexFilters.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIndexReadSupport.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIndexReadSupport.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetMetastoreSupport.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetMetastoreSupport.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaUtils.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaUtils.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetStatisticsRDD.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetStatisticsRDD.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/datasources/parquet/metadata.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/metadata.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/internal/IndexConf.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/internal/IndexConf.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/sources/ColumnFilterStatistics.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/sources/ColumnFilterStatistics.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/sources/ColumnStatistics.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/sources/ColumnStatistics.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/sources/filters.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/main/scala/org/apache/spark/sql/sources/filters.scala -------------------------------------------------------------------------------- /src/test/java/com/github/lightcopy/api/JavaApiSuite.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/java/com/github/lightcopy/api/JavaApiSuite.java -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/resources/log4j.properties -------------------------------------------------------------------------------- /src/test/resources/path-resolver: -------------------------------------------------------------------------------- 1 | # PATH-RESOLVER: DO NOT DELETE THIS FILE 2 | -------------------------------------------------------------------------------- /src/test/scala/com/github/lightcopy/QueryContextSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/com/github/lightcopy/QueryContextSuite.scala -------------------------------------------------------------------------------- /src/test/scala/com/github/lightcopy/testutil/ParquetLogging.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/com/github/lightcopy/testutil/ParquetLogging.scala -------------------------------------------------------------------------------- /src/test/scala/com/github/lightcopy/testutil/SparkBase.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/com/github/lightcopy/testutil/SparkBase.scala -------------------------------------------------------------------------------- /src/test/scala/com/github/lightcopy/testutil/SparkLocal.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/com/github/lightcopy/testutil/SparkLocal.scala -------------------------------------------------------------------------------- /src/test/scala/com/github/lightcopy/testutil/TestBase.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/com/github/lightcopy/testutil/TestBase.scala -------------------------------------------------------------------------------- /src/test/scala/com/github/lightcopy/testutil/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/com/github/lightcopy/testutil/package.scala -------------------------------------------------------------------------------- /src/test/scala/com/github/lightcopy/util/UtilSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/com/github/lightcopy/util/UtilSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/DataFrameIndexManagerSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/DataFrameIndexManagerSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/IndexSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/IndexSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/execution/datasources/CatalogTableSourceSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/execution/datasources/CatalogTableSourceSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/execution/datasources/IndexLocationSpecSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/execution/datasources/IndexLocationSpecSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/execution/datasources/IndexedDataSourceSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/execution/datasources/IndexedDataSourceSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/execution/datasources/MetastoreIndexSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/execution/datasources/MetastoreIndexSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/execution/datasources/MetastoreSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/execution/datasources/MetastoreSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/execution/datasources/parquet/MetadataSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/MetadataSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIndexFiltersSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIndexFiltersSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIndexReadSupportSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIndexReadSupportSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIndexSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIndexSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetMetastoreSupportSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetMetastoreSupportSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaUtilsSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaUtilsSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetStatisticsRDDSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetStatisticsRDDSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/internal/IndexConfSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/internal/IndexConfSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/sources/ColumnFilterStatisticsSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/sources/ColumnFilterStatisticsSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/sources/ColumnStatisticsSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/sources/ColumnStatisticsSuite.scala -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/sources/TrivialFilterSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightcopy/parquet-index/HEAD/src/test/scala/org/apache/spark/sql/sources/TrivialFilterSuite.scala -------------------------------------------------------------------------------- /version.sbt: -------------------------------------------------------------------------------- 1 | version in ThisBuild := "0.6.0-SNAPSHOT" 2 | --------------------------------------------------------------------------------