├── dev ├── notes.txt ├── snappy.ts ├── demo.ts └── shred.ts ├── .gitignore ├── .npmignore ├── src ├── tsconfig.json ├── thrift │ ├── BoundaryOrder.ts │ ├── FieldRepetitionType.ts │ ├── PageType.ts │ ├── CompressionCodec.ts │ ├── Type.ts │ ├── Encoding.ts │ ├── ConvertedType.ts │ ├── MapType.ts │ ├── BsonType.ts │ ├── DateType.ts │ ├── EnumType.ts │ ├── JsonType.ts │ ├── ListType.ts │ ├── NullType.ts │ ├── UUIDType.ts │ ├── StringType.ts │ ├── MicroSeconds.ts │ ├── MilliSeconds.ts │ ├── IndexPageHeader.ts │ ├── TypeDefinedOrder.ts │ ├── index.ts │ ├── KeyValue.ts │ ├── OffsetIndex.ts │ ├── DecimalType.ts │ ├── IntType.ts │ ├── TimeType.ts │ ├── TimestampType.ts │ ├── ColumnOrder.ts │ ├── DictionaryPageHeader.ts │ ├── SortingColumn.ts │ ├── PageEncodingStats.ts │ ├── TimeUnit.ts │ ├── PageLocation.ts │ ├── Statistics.ts │ ├── DataPageHeader.ts │ ├── RowGroup.ts │ ├── ColumnChunk.ts │ ├── ColumnIndex.ts │ ├── PageHeader.ts │ ├── DataPageHeaderV2.ts │ ├── SchemaElement.ts │ └── FileMetaData.ts ├── index.ts ├── codec │ ├── index.ts │ ├── declare.ts │ ├── rle.ts │ └── plain.ts ├── modules.d.ts ├── declare.ts ├── snappy │ ├── decompressor.ts │ └── index.ts ├── compression.ts ├── schema.ts ├── util.ts └── shred.ts ├── TODO.md ├── test ├── assert_util.ts ├── demo.ts ├── thrift.ts ├── codec_rle.ts └── dremel.ts ├── jest.js ├── tslint.json ├── .travis.yml ├── LICENSE ├── .vscode ├── tasks.json └── launch.json ├── package.json └── tsconfig.json /dev/notes.txt: -------------------------------------------------------------------------------- 1 | [12] Test string 2 | [-11, 4] 3 | [5] data 4 | [-21, 12] 5 | [-17, 5] 6 | [1] . 7 | [-1, 98] -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | *.parquet 3 | npm-debug.log 4 | .nyc_output 5 | lib 6 | build 7 | dump 8 | data 9 | .idea 10 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | package 3 | package.lock 4 | build 5 | gen 6 | dump 7 | *.parquet 8 | data 9 | !lib 10 | !src 11 | -------------------------------------------------------------------------------- /src/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../tsconfig.json", 3 | "compilerOptions": { 4 | "outDir": "../lib" 5 | }, 6 | "include": [ 7 | "./**/*" 8 | ] 9 | } -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | - [ ] Compatibility for nested types 2 | - [ ] Compatibility for repeated types 3 | - [ ] Does page v2 exists in Drill ? 4 | - [ ] Travis build 5 | - [ ] Types for arguments -------------------------------------------------------------------------------- /src/thrift/BoundaryOrder.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | export enum BoundaryOrder { 8 | UNORDERED = 0, 9 | ASCENDING = 1, 10 | DESCENDING = 2 11 | } 12 | -------------------------------------------------------------------------------- /src/thrift/FieldRepetitionType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | export enum FieldRepetitionType { 8 | REQUIRED = 0, 9 | OPTIONAL = 1, 10 | REPEATED = 2 11 | } 12 | -------------------------------------------------------------------------------- /src/thrift/PageType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | export enum PageType { 8 | DATA_PAGE = 0, 9 | INDEX_PAGE = 1, 10 | DICTIONARY_PAGE = 2, 11 | DATA_PAGE_V2 = 3 12 | } 13 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import * as ParquetShredder from './shred'; 2 | export * from './declare'; 3 | export { ParquetCursor, ParquetEnvelopeReader, ParquetReader } from './reader'; 4 | export { ParquetSchema } from './schema'; 5 | export { ParquetEnvelopeWriter, ParquetTransformer, ParquetWriter, ParquetWriterOptions } from './writer'; 6 | export { ParquetShredder }; 7 | -------------------------------------------------------------------------------- /src/thrift/CompressionCodec.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | export enum CompressionCodec { 8 | UNCOMPRESSED = 0, 9 | SNAPPY = 1, 10 | GZIP = 2, 11 | LZO = 3, 12 | BROTLI = 4, 13 | LZ4 = 5, 14 | ZSTD = 6 15 | } 16 | -------------------------------------------------------------------------------- /src/thrift/Type.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | export enum Type { 8 | BOOLEAN = 0, 9 | INT32 = 1, 10 | INT64 = 2, 11 | INT96 = 3, 12 | FLOAT = 4, 13 | DOUBLE = 5, 14 | BYTE_ARRAY = 6, 15 | FIXED_LEN_BYTE_ARRAY = 7 16 | } 17 | -------------------------------------------------------------------------------- /test/assert_util.ts: -------------------------------------------------------------------------------- 1 | import chai = require('chai'); 2 | const assert = chai.assert; 3 | 4 | const EPSILON_DEFAULT = 0.01; 5 | 6 | export function assertArrayEqualEpsilon(a: number[], b: number[], e?: number): void { 7 | assert.equal(a.length, b.length); 8 | for (let i = 0; i < a.length; ++i) { 9 | assert(Math.abs(a[i] - b[i]) < (e || EPSILON_DEFAULT)); 10 | } 11 | } 12 | 13 | test('Ok', () => void 0); 14 | -------------------------------------------------------------------------------- /jest.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | process.argv[1] = "./node_modules/jest/bin/jest"; 3 | process.argv[process.argv.length - 1] = process.argv[process.argv.length - 1].replace(".ts", ".js"); 4 | // console.log(process.argv); 5 | // console.log("-----------"); 6 | require(process.argv[1]); 7 | // const importLocal = require('import-local'); 8 | // if (!importLocal(__filename)) { 9 | // require('jest/node_modules/jest-cli/bin/jest'); 10 | // } 11 | -------------------------------------------------------------------------------- /src/thrift/Encoding.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | export enum Encoding { 8 | PLAIN = 0, 9 | PLAIN_DICTIONARY = 2, 10 | RLE = 3, 11 | BIT_PACKED = 4, 12 | DELTA_BINARY_PACKED = 5, 13 | DELTA_LENGTH_BYTE_ARRAY = 6, 14 | DELTA_BYTE_ARRAY = 7, 15 | RLE_DICTIONARY = 8 16 | } 17 | -------------------------------------------------------------------------------- /src/codec/index.ts: -------------------------------------------------------------------------------- 1 | import { ParquetCodec } from '../declare'; 2 | import { ParquetCodecKit } from './declare'; 3 | import RLE = require('./rle'); 4 | import PLAIN = require('./plain'); 5 | 6 | export * from './declare'; 7 | 8 | export const PARQUET_CODEC: Record = { 9 | PLAIN: { 10 | encodeValues: PLAIN.encodeValues, 11 | decodeValues: PLAIN.decodeValues 12 | }, 13 | RLE: { 14 | encodeValues: RLE.encodeValues, 15 | decodeValues: RLE.decodeValues 16 | } 17 | }; 18 | -------------------------------------------------------------------------------- /test/demo.ts: -------------------------------------------------------------------------------- 1 | import 'jest'; 2 | 3 | describe('This is a demo test', () => { 4 | beforeAll(async () => { 5 | // TODO: Before all tests 6 | }); 7 | 8 | beforeEach(async () => { 9 | // TODO: Before each test 10 | }); 11 | 12 | afterAll(async () => { 13 | // TODO: After all tests 14 | }); 15 | 16 | test('test', async () => { 17 | // LEARN: 18 | // chai 19 | // sinon-chai 20 | // chai-as-promised 21 | 22 | let a = 1; 23 | const b = a++; 24 | 25 | expect(a).not.toBe(b); 26 | }); 27 | }); 28 | -------------------------------------------------------------------------------- /src/codec/declare.ts: -------------------------------------------------------------------------------- 1 | import { PrimitiveType } from '../declare'; 2 | 3 | export interface CursorBuffer { 4 | buffer: Buffer; 5 | offset: number; 6 | size?: number; 7 | } 8 | 9 | export interface ParquetCodecOptions { 10 | bitWidth?: number; 11 | disableEnvelope?: boolean; 12 | typeLength?: number; 13 | } 14 | 15 | export interface ParquetCodecKit { 16 | encodeValues(type: PrimitiveType, values: any[], opts?: ParquetCodecOptions): Buffer; 17 | decodeValues(type: PrimitiveType, cursor: CursorBuffer, count: number, opts: ParquetCodecOptions): any[]; 18 | } 19 | -------------------------------------------------------------------------------- /tslint.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "tslint-config-airbnb", 3 | "rules": { 4 | "object-shorthand-properties-first": false, 5 | "no-increment-decrement": false, 6 | "import-name": false, 7 | "trailing-comma": false, 8 | "prefer-template": false, 9 | "no-else-after-return": false, 10 | "ter-computed-property-spacing": false, 11 | "max-line-length": [ 12 | true, 13 | 200 14 | ], 15 | "import-blacklist": [ 16 | true, 17 | ".", 18 | ".." 19 | ] 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/thrift/ConvertedType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | export enum ConvertedType { 8 | UTF8 = 0, 9 | MAP = 1, 10 | MAP_KEY_VALUE = 2, 11 | LIST = 3, 12 | ENUM = 4, 13 | DECIMAL = 5, 14 | DATE = 6, 15 | TIME_MILLIS = 7, 16 | TIME_MICROS = 8, 17 | TIMESTAMP_MILLIS = 9, 18 | TIMESTAMP_MICROS = 10, 19 | UINT_8 = 11, 20 | UINT_16 = 12, 21 | UINT_32 = 13, 22 | UINT_64 = 14, 23 | INT_8 = 15, 24 | INT_16 = 16, 25 | INT_32 = 17, 26 | INT_64 = 18, 27 | JSON = 19, 28 | BSON = 20, 29 | INTERVAL = 21 30 | } 31 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # https://medium.com/@russleyshaw/typescript-package-deployment-with-travisci-df788ffb8563 2 | language: node_js 3 | node_js: 4 | - 8 5 | deploy: 6 | provider: npm 7 | skip_cleanup: true 8 | email: kbajalc@gmail.com 9 | api_key: 10 | secure: J9Xe3yyw6tRLSF5dvhf0DEXueGRLRLlmdyiXjQCCz3tdBbeST9t8LFChqjZ87vL11cKF3zjSaKDl16eHGMOv5eyAPN0c0iUJ1u7u7XhwCEIqBesMnhP3W9m1anugNVhswqyCkWoNXVnmqj00x0ECw8CT2zFz38al3WP0X+tNecqbcvKfZdqapbwEKEhHgo2wZp2CbT5KhEPqCUgIV6yMwoPHvH9NfKANGZcak3ui8WhZgU/JRnTdzM58qze6c560Evi9NSfZ1+4gQJ/QOv0n8zytXUcVdzGIq2q7W5SaPKWL9+7I8qPWGJyEJ6dN7HwuRNv8mCk5ezst6VQCYlCLfDfzrEkRhMk0a46yMMUo3G3maZwxIhjxezlG2p3hP9hUpwG/HzlBu4C2rECswnjdAMas2XNqtWwdRwUjrHMzK1Ezd7zou5u81+ioowS4e1BCiox7tlcpKeEzsbXLCv2/34Q6YQbGUYULy6A+6wMX9mVzXfXohVicH+7ZgV6MDLpX5PTJXORh1Y2q+MkaL5uaMDAcMLyq4TObK92lzm/lV1ExnIvrab4fMUGwZZtwWwq8khaLQYyhPdx5NPT6PcumFtGO83DUcmti7Ci/G0wyxouX72aVvH5pyiiezSSFPfzRZkZTAFWXigpKIGeWkYVN7SbO4iR5HZaFvL5ntNwgPJw= 11 | on: 12 | tags: true 13 | -------------------------------------------------------------------------------- /test/thrift.ts: -------------------------------------------------------------------------------- 1 | // @flow 2 | 3 | import chai = require('chai'); 4 | const assert = chai.assert; 5 | import parquet_thrift = require('../src/thrift'); 6 | import parquet_util = require('../src/util'); 7 | 8 | // tslint:disable:ter-prefer-arrow-callback 9 | describe('Thrift', function () { 10 | 11 | it('should correctly en/decode literal zeroes with the CompactProtocol', function () { 12 | const obj = new parquet_thrift.ColumnMetaData({ 13 | type: parquet_thrift.Type.BOOLEAN, 14 | path_in_schema: ['test'], 15 | codec: parquet_thrift.CompressionCodec.UNCOMPRESSED, 16 | encodings: [parquet_thrift.Encoding.PLAIN], 17 | num_values: 0, 18 | total_uncompressed_size: 100, 19 | total_compressed_size: 100, 20 | data_page_offset: 0 21 | }); 22 | 23 | // tslint:disable-next-line:variable-name 24 | const obj_bin = parquet_util.serializeThrift(obj); 25 | assert.equal(obj_bin.length, 25); 26 | }); 27 | 28 | }); 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 ironSource Ltd. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in the 5 | Software without restriction, including without limitation the rights to use, 6 | copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 7 | Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 14 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 15 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 16 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 17 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /src/thrift/MapType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface IMapTypeArgs { 9 | } 10 | export class MapType { 11 | constructor() { 12 | } 13 | public write(output: thrift.TProtocol): void { 14 | output.writeStructBegin("MapType"); 15 | output.writeFieldStop(); 16 | output.writeStructEnd(); 17 | return; 18 | } 19 | public static read(input: thrift.TProtocol): MapType { 20 | input.readStructBegin(); 21 | while (true) { 22 | const ret: thrift.TField = input.readFieldBegin(); 23 | const fieldType: thrift.Thrift.Type = ret.ftype; 24 | const fieldId: number = ret.fid; 25 | if (fieldType === thrift.Thrift.Type.STOP) { 26 | break; 27 | } 28 | switch (fieldId) { 29 | default: { 30 | input.skip(fieldType); 31 | } 32 | } 33 | input.readFieldEnd(); 34 | } 35 | input.readStructEnd(); 36 | return new MapType(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/thrift/BsonType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface IBsonTypeArgs { 9 | } 10 | export class BsonType { 11 | constructor() { 12 | } 13 | public write(output: thrift.TProtocol): void { 14 | output.writeStructBegin("BsonType"); 15 | output.writeFieldStop(); 16 | output.writeStructEnd(); 17 | return; 18 | } 19 | public static read(input: thrift.TProtocol): BsonType { 20 | input.readStructBegin(); 21 | while (true) { 22 | const ret: thrift.TField = input.readFieldBegin(); 23 | const fieldType: thrift.Thrift.Type = ret.ftype; 24 | const fieldId: number = ret.fid; 25 | if (fieldType === thrift.Thrift.Type.STOP) { 26 | break; 27 | } 28 | switch (fieldId) { 29 | default: { 30 | input.skip(fieldType); 31 | } 32 | } 33 | input.readFieldEnd(); 34 | } 35 | input.readStructEnd(); 36 | return new BsonType(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/thrift/DateType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface IDateTypeArgs { 9 | } 10 | export class DateType { 11 | constructor() { 12 | } 13 | public write(output: thrift.TProtocol): void { 14 | output.writeStructBegin("DateType"); 15 | output.writeFieldStop(); 16 | output.writeStructEnd(); 17 | return; 18 | } 19 | public static read(input: thrift.TProtocol): DateType { 20 | input.readStructBegin(); 21 | while (true) { 22 | const ret: thrift.TField = input.readFieldBegin(); 23 | const fieldType: thrift.Thrift.Type = ret.ftype; 24 | const fieldId: number = ret.fid; 25 | if (fieldType === thrift.Thrift.Type.STOP) { 26 | break; 27 | } 28 | switch (fieldId) { 29 | default: { 30 | input.skip(fieldType); 31 | } 32 | } 33 | input.readFieldEnd(); 34 | } 35 | input.readStructEnd(); 36 | return new DateType(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/thrift/EnumType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface IEnumTypeArgs { 9 | } 10 | export class EnumType { 11 | constructor() { 12 | } 13 | public write(output: thrift.TProtocol): void { 14 | output.writeStructBegin("EnumType"); 15 | output.writeFieldStop(); 16 | output.writeStructEnd(); 17 | return; 18 | } 19 | public static read(input: thrift.TProtocol): EnumType { 20 | input.readStructBegin(); 21 | while (true) { 22 | const ret: thrift.TField = input.readFieldBegin(); 23 | const fieldType: thrift.Thrift.Type = ret.ftype; 24 | const fieldId: number = ret.fid; 25 | if (fieldType === thrift.Thrift.Type.STOP) { 26 | break; 27 | } 28 | switch (fieldId) { 29 | default: { 30 | input.skip(fieldType); 31 | } 32 | } 33 | input.readFieldEnd(); 34 | } 35 | input.readStructEnd(); 36 | return new EnumType(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/thrift/JsonType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface IJsonTypeArgs { 9 | } 10 | export class JsonType { 11 | constructor() { 12 | } 13 | public write(output: thrift.TProtocol): void { 14 | output.writeStructBegin("JsonType"); 15 | output.writeFieldStop(); 16 | output.writeStructEnd(); 17 | return; 18 | } 19 | public static read(input: thrift.TProtocol): JsonType { 20 | input.readStructBegin(); 21 | while (true) { 22 | const ret: thrift.TField = input.readFieldBegin(); 23 | const fieldType: thrift.Thrift.Type = ret.ftype; 24 | const fieldId: number = ret.fid; 25 | if (fieldType === thrift.Thrift.Type.STOP) { 26 | break; 27 | } 28 | switch (fieldId) { 29 | default: { 30 | input.skip(fieldType); 31 | } 32 | } 33 | input.readFieldEnd(); 34 | } 35 | input.readStructEnd(); 36 | return new JsonType(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/thrift/ListType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface IListTypeArgs { 9 | } 10 | export class ListType { 11 | constructor() { 12 | } 13 | public write(output: thrift.TProtocol): void { 14 | output.writeStructBegin("ListType"); 15 | output.writeFieldStop(); 16 | output.writeStructEnd(); 17 | return; 18 | } 19 | public static read(input: thrift.TProtocol): ListType { 20 | input.readStructBegin(); 21 | while (true) { 22 | const ret: thrift.TField = input.readFieldBegin(); 23 | const fieldType: thrift.Thrift.Type = ret.ftype; 24 | const fieldId: number = ret.fid; 25 | if (fieldType === thrift.Thrift.Type.STOP) { 26 | break; 27 | } 28 | switch (fieldId) { 29 | default: { 30 | input.skip(fieldType); 31 | } 32 | } 33 | input.readFieldEnd(); 34 | } 35 | input.readStructEnd(); 36 | return new ListType(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/thrift/NullType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface INullTypeArgs { 9 | } 10 | export class NullType { 11 | constructor() { 12 | } 13 | public write(output: thrift.TProtocol): void { 14 | output.writeStructBegin("NullType"); 15 | output.writeFieldStop(); 16 | output.writeStructEnd(); 17 | return; 18 | } 19 | public static read(input: thrift.TProtocol): NullType { 20 | input.readStructBegin(); 21 | while (true) { 22 | const ret: thrift.TField = input.readFieldBegin(); 23 | const fieldType: thrift.Thrift.Type = ret.ftype; 24 | const fieldId: number = ret.fid; 25 | if (fieldType === thrift.Thrift.Type.STOP) { 26 | break; 27 | } 28 | switch (fieldId) { 29 | default: { 30 | input.skip(fieldType); 31 | } 32 | } 33 | input.readFieldEnd(); 34 | } 35 | input.readStructEnd(); 36 | return new NullType(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/thrift/UUIDType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface IUUIDTypeArgs { 9 | } 10 | export class UUIDType { 11 | constructor() { 12 | } 13 | public write(output: thrift.TProtocol): void { 14 | output.writeStructBegin("UUIDType"); 15 | output.writeFieldStop(); 16 | output.writeStructEnd(); 17 | return; 18 | } 19 | public static read(input: thrift.TProtocol): UUIDType { 20 | input.readStructBegin(); 21 | while (true) { 22 | const ret: thrift.TField = input.readFieldBegin(); 23 | const fieldType: thrift.Thrift.Type = ret.ftype; 24 | const fieldId: number = ret.fid; 25 | if (fieldType === thrift.Thrift.Type.STOP) { 26 | break; 27 | } 28 | switch (fieldId) { 29 | default: { 30 | input.skip(fieldType); 31 | } 32 | } 33 | input.readFieldEnd(); 34 | } 35 | input.readStructEnd(); 36 | return new UUIDType(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/thrift/StringType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface IStringTypeArgs { 9 | } 10 | export class StringType { 11 | constructor() { 12 | } 13 | public write(output: thrift.TProtocol): void { 14 | output.writeStructBegin("StringType"); 15 | output.writeFieldStop(); 16 | output.writeStructEnd(); 17 | return; 18 | } 19 | public static read(input: thrift.TProtocol): StringType { 20 | input.readStructBegin(); 21 | while (true) { 22 | const ret: thrift.TField = input.readFieldBegin(); 23 | const fieldType: thrift.Thrift.Type = ret.ftype; 24 | const fieldId: number = ret.fid; 25 | if (fieldType === thrift.Thrift.Type.STOP) { 26 | break; 27 | } 28 | switch (fieldId) { 29 | default: { 30 | input.skip(fieldType); 31 | } 32 | } 33 | input.readFieldEnd(); 34 | } 35 | input.readStructEnd(); 36 | return new StringType(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/thrift/MicroSeconds.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface IMicroSecondsArgs { 9 | } 10 | export class MicroSeconds { 11 | constructor() { 12 | } 13 | public write(output: thrift.TProtocol): void { 14 | output.writeStructBegin("MicroSeconds"); 15 | output.writeFieldStop(); 16 | output.writeStructEnd(); 17 | return; 18 | } 19 | public static read(input: thrift.TProtocol): MicroSeconds { 20 | input.readStructBegin(); 21 | while (true) { 22 | const ret: thrift.TField = input.readFieldBegin(); 23 | const fieldType: thrift.Thrift.Type = ret.ftype; 24 | const fieldId: number = ret.fid; 25 | if (fieldType === thrift.Thrift.Type.STOP) { 26 | break; 27 | } 28 | switch (fieldId) { 29 | default: { 30 | input.skip(fieldType); 31 | } 32 | } 33 | input.readFieldEnd(); 34 | } 35 | input.readStructEnd(); 36 | return new MicroSeconds(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/thrift/MilliSeconds.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface IMilliSecondsArgs { 9 | } 10 | export class MilliSeconds { 11 | constructor() { 12 | } 13 | public write(output: thrift.TProtocol): void { 14 | output.writeStructBegin("MilliSeconds"); 15 | output.writeFieldStop(); 16 | output.writeStructEnd(); 17 | return; 18 | } 19 | public static read(input: thrift.TProtocol): MilliSeconds { 20 | input.readStructBegin(); 21 | while (true) { 22 | const ret: thrift.TField = input.readFieldBegin(); 23 | const fieldType: thrift.Thrift.Type = ret.ftype; 24 | const fieldId: number = ret.fid; 25 | if (fieldType === thrift.Thrift.Type.STOP) { 26 | break; 27 | } 28 | switch (fieldId) { 29 | default: { 30 | input.skip(fieldType); 31 | } 32 | } 33 | input.readFieldEnd(); 34 | } 35 | input.readStructEnd(); 36 | return new MilliSeconds(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/modules.d.ts: -------------------------------------------------------------------------------- 1 | declare module 'int53' { 2 | declare function readInt64BE(buffer: Buffer, offset?: number): number; 3 | declare function readInt64LE(buffer: Buffer, offset?: number): number; 4 | declare function readUInt64BE(buffer: Buffer, offset?: number): number; 5 | declare function readUInt64LE(buffer: Buffer, offset?: number): number; 6 | declare function writeInt64BE(value: number, buffer: Buffer, offset?: number): void; 7 | declare function writeInt64LE(value: number, buffer: Buffer, offset?: number): void; 8 | declare function writeUInt64BE(value: number, buffer: Buffer, offset?: number): void; 9 | declare function writeUInt64LE(value: number, buffer: Buffer, offset?: number): void; 10 | } 11 | 12 | // declare module 'snappyjs' { 13 | // declare function compress(uncompressed: Buffer): Buffer; 14 | // declare function compress(uncompressed: ArrayBuffer): ArrayBuffer; 15 | // declare function compress(uncompressed: Uint8Array): Uint8Array; 16 | // declare function uncompress(compressed: Buffer): Buffer; 17 | // declare function uncompress(compressed: ArrayBuffer): ArrayBuffer; 18 | // declare function uncompress(compressed: Uint8Array): Uint8Array; 19 | // } 20 | -------------------------------------------------------------------------------- /src/thrift/IndexPageHeader.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface IIndexPageHeaderArgs { 9 | } 10 | export class IndexPageHeader { 11 | constructor() { 12 | } 13 | public write(output: thrift.TProtocol): void { 14 | output.writeStructBegin("IndexPageHeader"); 15 | output.writeFieldStop(); 16 | output.writeStructEnd(); 17 | return; 18 | } 19 | public static read(input: thrift.TProtocol): IndexPageHeader { 20 | input.readStructBegin(); 21 | while (true) { 22 | const ret: thrift.TField = input.readFieldBegin(); 23 | const fieldType: thrift.Thrift.Type = ret.ftype; 24 | const fieldId: number = ret.fid; 25 | if (fieldType === thrift.Thrift.Type.STOP) { 26 | break; 27 | } 28 | switch (fieldId) { 29 | default: { 30 | input.skip(fieldType); 31 | } 32 | } 33 | input.readFieldEnd(); 34 | } 35 | input.readStructEnd(); 36 | return new IndexPageHeader(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "label": "PQT Watch", 6 | "identifier": "watch", 7 | "type": "npm", 8 | "script": "watch", 9 | "isBackground": true, 10 | "presentation": { 11 | "echo": true, 12 | "reveal": "never", 13 | "focus": false, 14 | "panel": "dedicated" 15 | }, 16 | "problemMatcher": [ 17 | "$tsc-watch" 18 | ], 19 | "group": { 20 | "kind": "build", 21 | "isDefault": true 22 | } 23 | }, 24 | { 25 | "label": "PQT Build", 26 | "identifier": "build", 27 | "type": "npm", 28 | "script": "build", 29 | "isBackground": true, 30 | "presentation": { 31 | "echo": true, 32 | "reveal": "never", 33 | "focus": false, 34 | "panel": "dedicated" 35 | }, 36 | "problemMatcher": [ 37 | "$tsc" 38 | ] 39 | } 40 | ] 41 | } -------------------------------------------------------------------------------- /src/thrift/TypeDefinedOrder.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface ITypeDefinedOrderArgs { 9 | } 10 | export class TypeDefinedOrder { 11 | constructor() { 12 | } 13 | public write(output: thrift.TProtocol): void { 14 | output.writeStructBegin("TypeDefinedOrder"); 15 | output.writeFieldStop(); 16 | output.writeStructEnd(); 17 | return; 18 | } 19 | public static read(input: thrift.TProtocol): TypeDefinedOrder { 20 | input.readStructBegin(); 21 | while (true) { 22 | const ret: thrift.TField = input.readFieldBegin(); 23 | const fieldType: thrift.Thrift.Type = ret.ftype; 24 | const fieldId: number = ret.fid; 25 | if (fieldType === thrift.Thrift.Type.STOP) { 26 | break; 27 | } 28 | switch (fieldId) { 29 | default: { 30 | input.skip(fieldType); 31 | } 32 | } 33 | input.readFieldEnd(); 34 | } 35 | input.readStructEnd(); 36 | return new TypeDefinedOrder(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/thrift/index.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | export * from "./Type"; 8 | export * from "./ConvertedType"; 9 | export * from "./FieldRepetitionType"; 10 | export * from "./Encoding"; 11 | export * from "./CompressionCodec"; 12 | export * from "./PageType"; 13 | export * from "./BoundaryOrder"; 14 | export * from "./Statistics"; 15 | export * from "./StringType"; 16 | export * from "./UUIDType"; 17 | export * from "./MapType"; 18 | export * from "./ListType"; 19 | export * from "./EnumType"; 20 | export * from "./DateType"; 21 | export * from "./NullType"; 22 | export * from "./DecimalType"; 23 | export * from "./MilliSeconds"; 24 | export * from "./MicroSeconds"; 25 | export * from "./TimestampType"; 26 | export * from "./TimeType"; 27 | export * from "./IntType"; 28 | export * from "./JsonType"; 29 | export * from "./BsonType"; 30 | export * from "./SchemaElement"; 31 | export * from "./DataPageHeader"; 32 | export * from "./IndexPageHeader"; 33 | export * from "./DictionaryPageHeader"; 34 | export * from "./DataPageHeaderV2"; 35 | export * from "./PageHeader"; 36 | export * from "./KeyValue"; 37 | export * from "./SortingColumn"; 38 | export * from "./PageEncodingStats"; 39 | export * from "./ColumnMetaData"; 40 | export * from "./ColumnChunk"; 41 | export * from "./RowGroup"; 42 | export * from "./TypeDefinedOrder"; 43 | export * from "./PageLocation"; 44 | export * from "./OffsetIndex"; 45 | export * from "./ColumnIndex"; 46 | export * from "./FileMetaData"; 47 | export * from "./TimeUnit"; 48 | export * from "./LogicalType"; 49 | export * from "./ColumnOrder"; 50 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible Node.js debug attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "type": "node", 9 | "request": "launch", 10 | "name": "PQT Program", 11 | "program": "${file}", 12 | "cwd": "${workspaceRoot}", 13 | "sourceMaps": true, 14 | "preLaunchTask": "watch", 15 | "showAsyncStacks": true, 16 | "outFiles": [ 17 | "${workspaceRoot}/build/**/*.js" 18 | ] 19 | }, 20 | { 21 | "type": "node", 22 | "request": "launch", 23 | "name": "PQT Demo", 24 | "program": "${workspaceRoot}/dev/demo.ts", 25 | "cwd": "${workspaceRoot}", 26 | "sourceMaps": true, 27 | "preLaunchTask": "watch", 28 | "showAsyncStacks": true, 29 | "outFiles": [ 30 | "${workspaceRoot}/build/**/*.js" 31 | ] 32 | }, 33 | { 34 | "type": "node", 35 | "request": "launch", 36 | "name": "PQT Jest File", 37 | "program": "${workspaceRoot}/jest", 38 | "args": [ 39 | "-i", 40 | // "${file}" 41 | "${workspaceRoot}/build/${relativeFile}" 42 | ], 43 | "internalConsoleOptions": "openOnSessionStart", 44 | "outputCapture": "std", 45 | // "console": "integratedTerminal", 46 | "preLaunchTask": "watch", 47 | "sourceMaps": true, 48 | "outFiles": [ 49 | "${workspaceRoot}/build/**/*.js" 50 | ] 51 | }, 52 | { 53 | "type": "node", 54 | "request": "attach", 55 | "name": "Attach to Process", 56 | "port": 5858, 57 | "outFiles": [] 58 | } 59 | ] 60 | } -------------------------------------------------------------------------------- /src/declare.ts: -------------------------------------------------------------------------------- 1 | 2 | export type ParquetCodec = 'PLAIN' | 'RLE'; 3 | export type ParquetCompression = 'UNCOMPRESSED' | 'GZIP' | 'SNAPPY' | 'LZO' | 'BROTLI' | 'LZ4'; 4 | export type RepetitionType = 'REQUIRED' | 'OPTIONAL' | 'REPEATED'; 5 | export type ParquetType = PrimitiveType | OriginalType; 6 | 7 | export type PrimitiveType = 8 | // Base Types 9 | 'BOOLEAN' // 0 10 | | 'INT32' // 1 11 | | 'INT64' // 2 12 | | 'INT96' // 3 13 | | 'FLOAT' // 4 14 | | 'DOUBLE' // 5 15 | | 'BYTE_ARRAY' // 6, 16 | | 'FIXED_LEN_BYTE_ARRAY'; // 7 17 | 18 | export type OriginalType = 19 | // Converted Types 20 | | 'UTF8' // 0 21 | // | 'MAP' // 1 22 | // | 'MAP_KEY_VALUE' // 2 23 | // | 'LIST' // 3 24 | // | 'ENUM' // 4 25 | // | 'DECIMAL' // 5 26 | | 'DATE' // 6 27 | | 'TIME_MILLIS' // 7 28 | | 'TIME_MICROS' // 8 29 | | 'TIMESTAMP_MILLIS' // 9 30 | | 'TIMESTAMP_MICROS' // 10 31 | | 'UINT_8' // 11 32 | | 'UINT_16' // 12 33 | | 'UINT_32' // 13 34 | | 'UINT_64' // 14 35 | | 'INT_8' // 15 36 | | 'INT_16' // 16 37 | | 'INT_32' // 17 38 | | 'INT_64' // 18 39 | | 'JSON' // 19 40 | | 'BSON' // 20 41 | | 'INTERVAL'; // 21 42 | 43 | export interface SchemaDefinition { 44 | [string: string]: FieldDefinition; 45 | } 46 | 47 | export interface FieldDefinition { 48 | type?: ParquetType; 49 | typeLength?: number; 50 | encoding?: ParquetCodec; 51 | compression?: ParquetCompression; 52 | optional?: boolean; 53 | repeated?: boolean; 54 | fields?: SchemaDefinition; 55 | } 56 | 57 | export interface ParquetField { 58 | name: string; 59 | path: string[]; 60 | key: string; 61 | primitiveType?: PrimitiveType; 62 | originalType?: OriginalType; 63 | repetitionType: RepetitionType; 64 | typeLength?: number; 65 | encoding?: ParquetCodec; 66 | compression?: ParquetCompression; 67 | rLevelMax: number; 68 | dLevelMax: number; 69 | isNested?: boolean; 70 | fieldCount?: number; 71 | fields?: Record; 72 | } 73 | 74 | export interface ParquetBuffer { 75 | rowCount?: number; 76 | columnData?: Record; 77 | } 78 | 79 | export interface ParquetData { 80 | dlevels: number[]; 81 | rlevels: number[]; 82 | values: any[]; 83 | count: number; 84 | } 85 | 86 | export interface ParquetRecord { 87 | [key: string]: any; 88 | } 89 | -------------------------------------------------------------------------------- /src/thrift/KeyValue.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface IKeyValueArgs { 9 | key: string; 10 | value?: string; 11 | } 12 | export class KeyValue { 13 | public key: string; 14 | public value?: string; 15 | constructor(args: IKeyValueArgs) { 16 | if (args != null && args.key != null) { 17 | this.key = args.key; 18 | } 19 | else { 20 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[key] is unset!"); 21 | } 22 | if (args != null && args.value != null) { 23 | this.value = args.value; 24 | } 25 | } 26 | public write(output: thrift.TProtocol): void { 27 | output.writeStructBegin("KeyValue"); 28 | if (this.key != null) { 29 | output.writeFieldBegin("key", thrift.Thrift.Type.STRING, 1); 30 | output.writeString(this.key); 31 | output.writeFieldEnd(); 32 | } 33 | if (this.value != null) { 34 | output.writeFieldBegin("value", thrift.Thrift.Type.STRING, 2); 35 | output.writeString(this.value); 36 | output.writeFieldEnd(); 37 | } 38 | output.writeFieldStop(); 39 | output.writeStructEnd(); 40 | return; 41 | } 42 | public static read(input: thrift.TProtocol): KeyValue { 43 | input.readStructBegin(); 44 | let _args: any = {}; 45 | while (true) { 46 | const ret: thrift.TField = input.readFieldBegin(); 47 | const fieldType: thrift.Thrift.Type = ret.ftype; 48 | const fieldId: number = ret.fid; 49 | if (fieldType === thrift.Thrift.Type.STOP) { 50 | break; 51 | } 52 | switch (fieldId) { 53 | case 1: 54 | if (fieldType === thrift.Thrift.Type.STRING) { 55 | const value_1: string = input.readString(); 56 | _args.key = value_1; 57 | } 58 | else { 59 | input.skip(fieldType); 60 | } 61 | break; 62 | case 2: 63 | if (fieldType === thrift.Thrift.Type.STRING) { 64 | const value_2: string = input.readString(); 65 | _args.value = value_2; 66 | } 67 | else { 68 | input.skip(fieldType); 69 | } 70 | break; 71 | default: { 72 | input.skip(fieldType); 73 | } 74 | } 75 | input.readFieldEnd(); 76 | } 77 | input.readStructEnd(); 78 | if (_args.key !== undefined) { 79 | return new KeyValue(_args); 80 | } 81 | else { 82 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read KeyValue from input"); 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/thrift/OffsetIndex.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | import * as PageLocation from "./PageLocation"; 9 | export interface IOffsetIndexArgs { 10 | page_locations: Array; 11 | } 12 | export class OffsetIndex { 13 | public page_locations: Array; 14 | constructor(args: IOffsetIndexArgs) { 15 | if (args != null && args.page_locations != null) { 16 | this.page_locations = args.page_locations; 17 | } 18 | else { 19 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[page_locations] is unset!"); 20 | } 21 | } 22 | public write(output: thrift.TProtocol): void { 23 | output.writeStructBegin("OffsetIndex"); 24 | if (this.page_locations != null) { 25 | output.writeFieldBegin("page_locations", thrift.Thrift.Type.LIST, 1); 26 | output.writeListBegin(thrift.Thrift.Type.STRUCT, this.page_locations.length); 27 | this.page_locations.forEach((value_1: PageLocation.PageLocation): void => { 28 | value_1.write(output); 29 | }); 30 | output.writeListEnd(); 31 | output.writeFieldEnd(); 32 | } 33 | output.writeFieldStop(); 34 | output.writeStructEnd(); 35 | return; 36 | } 37 | public static read(input: thrift.TProtocol): OffsetIndex { 38 | input.readStructBegin(); 39 | let _args: any = {}; 40 | while (true) { 41 | const ret: thrift.TField = input.readFieldBegin(); 42 | const fieldType: thrift.Thrift.Type = ret.ftype; 43 | const fieldId: number = ret.fid; 44 | if (fieldType === thrift.Thrift.Type.STOP) { 45 | break; 46 | } 47 | switch (fieldId) { 48 | case 1: 49 | if (fieldType === thrift.Thrift.Type.LIST) { 50 | const value_2: Array = new Array(); 51 | const metadata_1: thrift.TList = input.readListBegin(); 52 | const size_1: number = metadata_1.size; 53 | for (let i_1: number = 0; i_1 < size_1; i_1++) { 54 | const value_3: PageLocation.PageLocation = PageLocation.PageLocation.read(input); 55 | value_2.push(value_3); 56 | } 57 | input.readListEnd(); 58 | _args.page_locations = value_2; 59 | } 60 | else { 61 | input.skip(fieldType); 62 | } 63 | break; 64 | default: { 65 | input.skip(fieldType); 66 | } 67 | } 68 | input.readFieldEnd(); 69 | } 70 | input.readStructEnd(); 71 | if (_args.page_locations !== undefined) { 72 | return new OffsetIndex(_args); 73 | } 74 | else { 75 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read OffsetIndex from input"); 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/thrift/DecimalType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface IDecimalTypeArgs { 9 | scale: number; 10 | precision: number; 11 | } 12 | export class DecimalType { 13 | public scale: number; 14 | public precision: number; 15 | constructor(args: IDecimalTypeArgs) { 16 | if (args != null && args.scale != null) { 17 | this.scale = args.scale; 18 | } 19 | else { 20 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[scale] is unset!"); 21 | } 22 | if (args != null && args.precision != null) { 23 | this.precision = args.precision; 24 | } 25 | else { 26 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[precision] is unset!"); 27 | } 28 | } 29 | public write(output: thrift.TProtocol): void { 30 | output.writeStructBegin("DecimalType"); 31 | if (this.scale != null) { 32 | output.writeFieldBegin("scale", thrift.Thrift.Type.I32, 1); 33 | output.writeI32(this.scale); 34 | output.writeFieldEnd(); 35 | } 36 | if (this.precision != null) { 37 | output.writeFieldBegin("precision", thrift.Thrift.Type.I32, 2); 38 | output.writeI32(this.precision); 39 | output.writeFieldEnd(); 40 | } 41 | output.writeFieldStop(); 42 | output.writeStructEnd(); 43 | return; 44 | } 45 | public static read(input: thrift.TProtocol): DecimalType { 46 | input.readStructBegin(); 47 | let _args: any = {}; 48 | while (true) { 49 | const ret: thrift.TField = input.readFieldBegin(); 50 | const fieldType: thrift.Thrift.Type = ret.ftype; 51 | const fieldId: number = ret.fid; 52 | if (fieldType === thrift.Thrift.Type.STOP) { 53 | break; 54 | } 55 | switch (fieldId) { 56 | case 1: 57 | if (fieldType === thrift.Thrift.Type.I32) { 58 | const value_1: number = input.readI32(); 59 | _args.scale = value_1; 60 | } 61 | else { 62 | input.skip(fieldType); 63 | } 64 | break; 65 | case 2: 66 | if (fieldType === thrift.Thrift.Type.I32) { 67 | const value_2: number = input.readI32(); 68 | _args.precision = value_2; 69 | } 70 | else { 71 | input.skip(fieldType); 72 | } 73 | break; 74 | default: { 75 | input.skip(fieldType); 76 | } 77 | } 78 | input.readFieldEnd(); 79 | } 80 | input.readStructEnd(); 81 | if (_args.scale !== undefined && _args.precision !== undefined) { 82 | return new DecimalType(_args); 83 | } 84 | else { 85 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read DecimalType from input"); 86 | } 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/thrift/IntType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface IIntTypeArgs { 9 | bitWidth: number; 10 | isSigned: boolean; 11 | } 12 | export class IntType { 13 | public bitWidth: number; 14 | public isSigned: boolean; 15 | constructor(args: IIntTypeArgs) { 16 | if (args != null && args.bitWidth != null) { 17 | this.bitWidth = args.bitWidth; 18 | } 19 | else { 20 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[bitWidth] is unset!"); 21 | } 22 | if (args != null && args.isSigned != null) { 23 | this.isSigned = args.isSigned; 24 | } 25 | else { 26 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[isSigned] is unset!"); 27 | } 28 | } 29 | public write(output: thrift.TProtocol): void { 30 | output.writeStructBegin("IntType"); 31 | if (this.bitWidth != null) { 32 | output.writeFieldBegin("bitWidth", thrift.Thrift.Type.BYTE, 1); 33 | output.writeByte(this.bitWidth); 34 | output.writeFieldEnd(); 35 | } 36 | if (this.isSigned != null) { 37 | output.writeFieldBegin("isSigned", thrift.Thrift.Type.BOOL, 2); 38 | output.writeBool(this.isSigned); 39 | output.writeFieldEnd(); 40 | } 41 | output.writeFieldStop(); 42 | output.writeStructEnd(); 43 | return; 44 | } 45 | public static read(input: thrift.TProtocol): IntType { 46 | input.readStructBegin(); 47 | let _args: any = {}; 48 | while (true) { 49 | const ret: thrift.TField = input.readFieldBegin(); 50 | const fieldType: thrift.Thrift.Type = ret.ftype; 51 | const fieldId: number = ret.fid; 52 | if (fieldType === thrift.Thrift.Type.STOP) { 53 | break; 54 | } 55 | switch (fieldId) { 56 | case 1: 57 | if (fieldType === thrift.Thrift.Type.BYTE) { 58 | const value_1: number = input.readByte(); 59 | _args.bitWidth = value_1; 60 | } 61 | else { 62 | input.skip(fieldType); 63 | } 64 | break; 65 | case 2: 66 | if (fieldType === thrift.Thrift.Type.BOOL) { 67 | const value_2: boolean = input.readBool(); 68 | _args.isSigned = value_2; 69 | } 70 | else { 71 | input.skip(fieldType); 72 | } 73 | break; 74 | default: { 75 | input.skip(fieldType); 76 | } 77 | } 78 | input.readFieldEnd(); 79 | } 80 | input.readStructEnd(); 81 | if (_args.bitWidth !== undefined && _args.isSigned !== undefined) { 82 | return new IntType(_args); 83 | } 84 | else { 85 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read IntType from input"); 86 | } 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/snappy/decompressor.ts: -------------------------------------------------------------------------------- 1 | const WORD_MASK = [0, 0xff, 0xffff, 0xffffff, 0xffffffff]; 2 | 3 | function copyBytes(fromArray: Buffer, fromPos: number, toArray: Buffer, toPos: number, length: number) { 4 | for (let i = 0; i < length; i++) { 5 | toArray[toPos + i] = fromArray[fromPos + i]; 6 | } 7 | } 8 | 9 | function selfCopyBytes(array: Buffer, pos: number, offset: number, length: number) { 10 | for (let i = 0; i < length; i++) { 11 | array[pos + i] = array[pos - offset + i]; 12 | } 13 | } 14 | 15 | export function readUncompressedLength(input: Buffer, varLen?: boolean) { 16 | let pos = 0; 17 | let result = 0; 18 | let shift = 0; 19 | while (shift < 32 && pos < input.length) { 20 | const c = input[pos]; 21 | pos += 1; 22 | const val = c & 0x7f; 23 | if (((val << shift) >>> shift) !== val) { 24 | return -1; 25 | } 26 | result |= val << shift; 27 | if (c < 128) { 28 | return varLen ? pos : result; 29 | } 30 | shift += 7; 31 | } 32 | return -1; 33 | } 34 | 35 | export function uncompressToBuffer(input: Buffer, output: Buffer) { 36 | let pos = readUncompressedLength(input, true); 37 | 38 | const arrayLength = input.length; 39 | let outPos = 0; 40 | let c: number; 41 | let len: number; 42 | let smallLen: number; 43 | let offset: number; 44 | while (pos < arrayLength) { 45 | c = input[pos]; 46 | pos += 1; 47 | if ((c & 0x3) === 0) { 48 | // Literal 49 | len = (c >>> 2) + 1; 50 | if (len > 60) { 51 | if (pos + 3 >= arrayLength) { 52 | return false; 53 | } 54 | smallLen = len - 60; 55 | len = input[pos] + (input[pos + 1] << 8) + (input[pos + 2] << 16) + (input[pos + 3] << 24); 56 | len = (len & WORD_MASK[smallLen]) + 1; 57 | pos += smallLen; 58 | } 59 | if (pos + len > arrayLength) { 60 | return false; 61 | } 62 | copyBytes(input, pos, output, outPos, len); 63 | pos += len; 64 | outPos += len; 65 | } else { 66 | switch (c & 0x3) { 67 | case 1: 68 | len = ((c >>> 2) & 0x7) + 4; 69 | offset = input[pos] + ((c >>> 5) << 8); 70 | pos += 1; 71 | break; 72 | case 2: 73 | if (pos + 1 >= arrayLength) { 74 | return false; 75 | } 76 | len = (c >>> 2) + 1; 77 | offset = input[pos] + (input[pos + 1] << 8); 78 | pos += 2; 79 | break; 80 | case 3: 81 | if (pos + 3 >= arrayLength) { 82 | return false; 83 | } 84 | len = (c >>> 2) + 1; 85 | offset = input[pos] + (input[pos + 1] << 8) + (input[pos + 2] << 16) + (input[pos + 3] << 24); 86 | pos += 4; 87 | break; 88 | default: 89 | break; 90 | } 91 | if (offset === 0 || offset > outPos) { 92 | return false; 93 | } 94 | selfCopyBytes(output, outPos, offset, len); 95 | outPos += len; 96 | } 97 | } 98 | return true; 99 | } 100 | -------------------------------------------------------------------------------- /dev/snappy.ts: -------------------------------------------------------------------------------- 1 | import snappy = require('../src/snappy'); 2 | const snappyjs = require('snappy'); 3 | import assert = require('assert'); 4 | import fs = require('fs'); 5 | 6 | let qq = 7 | 'Test string est data Test string data ' 8 | + '.XXXXX.................................................................... ' 9 | + 'XXXXX....................................................................'; 10 | 11 | qq = fs.readFileSync('./data/alice29.txt').toString(); 12 | 13 | const buf = Buffer.from(qq); 14 | const zz = snappy.compress(buf); 15 | const yy = snappyjs.compressSync(buf); 16 | 17 | const big = Buffer.concat([buf, buf, buf, buf, buf]); 18 | 19 | let now = Date.now(); 20 | for (let i = 0; i < 1000; i++) { 21 | snappy.compress(big); 22 | } 23 | console.log(Date.now() - now); 24 | 25 | now = Date.now(); 26 | for (let i = 0; i < 1000; i++) { 27 | snappyjs.compressSync(big); 28 | } 29 | console.log(Date.now() - now); 30 | 31 | const vv = snappyjs.uncompressSync(zz); 32 | assert.deepStrictEqual(vv, Buffer.from(qq)); 33 | assert.deepStrictEqual(zz, yy); 34 | 35 | // export function compressFragment2(input: Buffer, baseIp: number, inputSize: number, output: Buffer, baseOp: number) { 36 | // if (inputSize < INPUT_MARGIN) { 37 | // return emitLiteral(input, baseIp, inputSize, output, baseOp); 38 | // } 39 | 40 | // const hashTableBits = hashBits(inputSize); 41 | // const shift = 32 - hashTableBits; 42 | // if (typeof globalHashTables[hashTableBits] === 'undefined') { 43 | // globalHashTables[hashTableBits] = new Uint16Array(1 << hashTableBits); 44 | // } 45 | // const table = globalHashTables[hashTableBits]; 46 | // for (let i = 0; i < table.length; i++) { 47 | // table[i] = 0; 48 | // } 49 | 50 | // const ipEnd = baseIp + inputSize; 51 | // const ipLimit = ipEnd - INPUT_MARGIN; 52 | 53 | // let ip = baseIp + 1; 54 | // let op = baseOp; 55 | 56 | // let hash = hash32(input, baseIp, shift); 57 | // table[hash] = 1; 58 | 59 | // let nextEmit = baseIp; 60 | // loop: while (ip < ipLimit) { 61 | // let nextIp = ip; 62 | // let candidate = 0; 63 | // let matched = 0; 64 | // do { 65 | // ip = nextIp; 66 | // hash = hash32(input, ip, shift); 67 | // const pos = table[hash] - 1; 68 | // if (pos < 0) table[hash] = ip - baseIp + 1; 69 | // nextIp = ip + 1; 70 | // if (nextIp > ipLimit) break loop; 71 | // for (let c = pos; pos >= 0 && c < ip - baseIp - 4; c++) { 72 | // let x = 0; 73 | // while (ip + x < ipEnd && input[baseIp + c + x] === input[ip + x]) x++; 74 | // if (x > matched) { 75 | // matched = x; 76 | // candidate = c; 77 | // } 78 | // } 79 | // } while (matched < 4); 80 | // if (nextEmit < ip) { 81 | // op = emitLiteral(input, nextEmit, ip - nextEmit, output, op); 82 | // } 83 | // op = emitCopy(output, op, ip - candidate, matched); 84 | // ip += matched; 85 | // nextEmit = ip; 86 | // } 87 | 88 | // if (nextEmit < ipEnd) { 89 | // op = emitLiteral(input, nextEmit, ipEnd - nextEmit, output, op); 90 | // } 91 | // return op; 92 | // } 93 | -------------------------------------------------------------------------------- /src/thrift/TimeType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | import * as TimeUnit from "./TimeUnit"; 9 | export interface ITimeTypeArgs { 10 | isAdjustedToUTC: boolean; 11 | unit: TimeUnit.TimeUnit; 12 | } 13 | export class TimeType { 14 | public isAdjustedToUTC: boolean; 15 | public unit: TimeUnit.TimeUnit; 16 | constructor(args: ITimeTypeArgs) { 17 | if (args != null && args.isAdjustedToUTC != null) { 18 | this.isAdjustedToUTC = args.isAdjustedToUTC; 19 | } 20 | else { 21 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[isAdjustedToUTC] is unset!"); 22 | } 23 | if (args != null && args.unit != null) { 24 | this.unit = args.unit; 25 | } 26 | else { 27 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[unit] is unset!"); 28 | } 29 | } 30 | public write(output: thrift.TProtocol): void { 31 | output.writeStructBegin("TimeType"); 32 | if (this.isAdjustedToUTC != null) { 33 | output.writeFieldBegin("isAdjustedToUTC", thrift.Thrift.Type.BOOL, 1); 34 | output.writeBool(this.isAdjustedToUTC); 35 | output.writeFieldEnd(); 36 | } 37 | if (this.unit != null) { 38 | output.writeFieldBegin("unit", thrift.Thrift.Type.STRUCT, 2); 39 | this.unit.write(output); 40 | output.writeFieldEnd(); 41 | } 42 | output.writeFieldStop(); 43 | output.writeStructEnd(); 44 | return; 45 | } 46 | public static read(input: thrift.TProtocol): TimeType { 47 | input.readStructBegin(); 48 | let _args: any = {}; 49 | while (true) { 50 | const ret: thrift.TField = input.readFieldBegin(); 51 | const fieldType: thrift.Thrift.Type = ret.ftype; 52 | const fieldId: number = ret.fid; 53 | if (fieldType === thrift.Thrift.Type.STOP) { 54 | break; 55 | } 56 | switch (fieldId) { 57 | case 1: 58 | if (fieldType === thrift.Thrift.Type.BOOL) { 59 | const value_1: boolean = input.readBool(); 60 | _args.isAdjustedToUTC = value_1; 61 | } 62 | else { 63 | input.skip(fieldType); 64 | } 65 | break; 66 | case 2: 67 | if (fieldType === thrift.Thrift.Type.STRUCT) { 68 | const value_2: TimeUnit.TimeUnit = TimeUnit.TimeUnit.read(input); 69 | _args.unit = value_2; 70 | } 71 | else { 72 | input.skip(fieldType); 73 | } 74 | break; 75 | default: { 76 | input.skip(fieldType); 77 | } 78 | } 79 | input.readFieldEnd(); 80 | } 81 | input.readStructEnd(); 82 | if (_args.isAdjustedToUTC !== undefined && _args.unit !== undefined) { 83 | return new TimeType(_args); 84 | } 85 | else { 86 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read TimeType from input"); 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/thrift/TimestampType.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | import * as TimeUnit from "./TimeUnit"; 9 | export interface ITimestampTypeArgs { 10 | isAdjustedToUTC: boolean; 11 | unit: TimeUnit.TimeUnit; 12 | } 13 | export class TimestampType { 14 | public isAdjustedToUTC: boolean; 15 | public unit: TimeUnit.TimeUnit; 16 | constructor(args: ITimestampTypeArgs) { 17 | if (args != null && args.isAdjustedToUTC != null) { 18 | this.isAdjustedToUTC = args.isAdjustedToUTC; 19 | } 20 | else { 21 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[isAdjustedToUTC] is unset!"); 22 | } 23 | if (args != null && args.unit != null) { 24 | this.unit = args.unit; 25 | } 26 | else { 27 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[unit] is unset!"); 28 | } 29 | } 30 | public write(output: thrift.TProtocol): void { 31 | output.writeStructBegin("TimestampType"); 32 | if (this.isAdjustedToUTC != null) { 33 | output.writeFieldBegin("isAdjustedToUTC", thrift.Thrift.Type.BOOL, 1); 34 | output.writeBool(this.isAdjustedToUTC); 35 | output.writeFieldEnd(); 36 | } 37 | if (this.unit != null) { 38 | output.writeFieldBegin("unit", thrift.Thrift.Type.STRUCT, 2); 39 | this.unit.write(output); 40 | output.writeFieldEnd(); 41 | } 42 | output.writeFieldStop(); 43 | output.writeStructEnd(); 44 | return; 45 | } 46 | public static read(input: thrift.TProtocol): TimestampType { 47 | input.readStructBegin(); 48 | let _args: any = {}; 49 | while (true) { 50 | const ret: thrift.TField = input.readFieldBegin(); 51 | const fieldType: thrift.Thrift.Type = ret.ftype; 52 | const fieldId: number = ret.fid; 53 | if (fieldType === thrift.Thrift.Type.STOP) { 54 | break; 55 | } 56 | switch (fieldId) { 57 | case 1: 58 | if (fieldType === thrift.Thrift.Type.BOOL) { 59 | const value_1: boolean = input.readBool(); 60 | _args.isAdjustedToUTC = value_1; 61 | } 62 | else { 63 | input.skip(fieldType); 64 | } 65 | break; 66 | case 2: 67 | if (fieldType === thrift.Thrift.Type.STRUCT) { 68 | const value_2: TimeUnit.TimeUnit = TimeUnit.TimeUnit.read(input); 69 | _args.unit = value_2; 70 | } 71 | else { 72 | input.skip(fieldType); 73 | } 74 | break; 75 | default: { 76 | input.skip(fieldType); 77 | } 78 | } 79 | input.readFieldEnd(); 80 | } 81 | input.readStructEnd(); 82 | if (_args.isAdjustedToUTC !== undefined && _args.unit !== undefined) { 83 | return new TimestampType(_args); 84 | } 85 | else { 86 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read TimestampType from input"); 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/thrift/ColumnOrder.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | import * as TypeDefinedOrder from "./TypeDefinedOrder"; 9 | export interface IColumnOrderArgs { 10 | TYPE_ORDER?: TypeDefinedOrder.TypeDefinedOrder; 11 | } 12 | export class ColumnOrder { 13 | public TYPE_ORDER?: TypeDefinedOrder.TypeDefinedOrder; 14 | constructor(args?: IColumnOrderArgs) { 15 | let _fieldsSet: number = 0; 16 | if (args != null) { 17 | if (args.TYPE_ORDER != null) { 18 | _fieldsSet++; 19 | this.TYPE_ORDER = args.TYPE_ORDER; 20 | } 21 | if (_fieldsSet > 1) { 22 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.INVALID_DATA, "Cannot read a TUnion with more than one set value!"); 23 | } 24 | else if (_fieldsSet < 1) { 25 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.INVALID_DATA, "Cannot read a TUnion with no set value!"); 26 | } 27 | } 28 | } 29 | public static fromTYPE_ORDER(TYPE_ORDER: TypeDefinedOrder.TypeDefinedOrder): ColumnOrder { 30 | return new ColumnOrder({ TYPE_ORDER }); 31 | } 32 | public write(output: thrift.TProtocol): void { 33 | output.writeStructBegin("ColumnOrder"); 34 | if (this.TYPE_ORDER != null) { 35 | output.writeFieldBegin("TYPE_ORDER", thrift.Thrift.Type.STRUCT, 1); 36 | this.TYPE_ORDER.write(output); 37 | output.writeFieldEnd(); 38 | } 39 | output.writeFieldStop(); 40 | output.writeStructEnd(); 41 | return; 42 | } 43 | public static read(input: thrift.TProtocol): ColumnOrder { 44 | let _fieldsSet: number = 0; 45 | let _returnValue: ColumnOrder | null = null; 46 | input.readStructBegin(); 47 | while (true) { 48 | const ret: thrift.TField = input.readFieldBegin(); 49 | const fieldType: thrift.Thrift.Type = ret.ftype; 50 | const fieldId: number = ret.fid; 51 | if (fieldType === thrift.Thrift.Type.STOP) { 52 | break; 53 | } 54 | switch (fieldId) { 55 | case 1: 56 | if (fieldType === thrift.Thrift.Type.STRUCT) { 57 | _fieldsSet++; 58 | const value_1: TypeDefinedOrder.TypeDefinedOrder = TypeDefinedOrder.TypeDefinedOrder.read(input); 59 | _returnValue = ColumnOrder.fromTYPE_ORDER(value_1); 60 | } 61 | else { 62 | input.skip(fieldType); 63 | } 64 | break; 65 | default: { 66 | input.skip(fieldType); 67 | } 68 | } 69 | input.readFieldEnd(); 70 | } 71 | input.readStructEnd(); 72 | if (_fieldsSet > 1) { 73 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.INVALID_DATA, "Cannot read a TUnion with more than one set value!"); 74 | } 75 | else if (_fieldsSet < 1) { 76 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.INVALID_DATA, "Cannot read a TUnion with no set value!"); 77 | } 78 | if (_returnValue !== null) { 79 | return _returnValue; 80 | } 81 | else { 82 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read data for TUnion"); 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "parquets", 3 | "description": "TypeScript implementation of the Parquet file format, based on parquet.js", 4 | "version": "0.10.10", 5 | "upstream": "0.10.1", 6 | "homepage": "https://github.com/kbajalc/parquets", 7 | "author": "kbajalc@gmail.com", 8 | "license": "MIT", 9 | "browser": { 10 | "fs": false 11 | }, 12 | "main": "./lib/index.js", 13 | "types": "./lib/index.d.ts", 14 | "source": "./src/index.ts", 15 | "keywords": [ 16 | "dremel", 17 | "parquet" 18 | ], 19 | "repository": { 20 | "type": "git", 21 | "url": "git://github.com/kbajalc/parquets.git" 22 | }, 23 | "files": [ 24 | "lib", 25 | "src" 26 | ], 27 | "scripts": { 28 | "clean": "rm -rf ./lib && rm -rf ./build", 29 | "build": "npm run clean ; tsc -p src ; tsc -p .", 30 | "watch": "npm run clean ; tsc -p . --watch", 31 | "test": "npm run build && jest --verbose test/*.ts", 32 | "peer": "npm i brotli lzo lz4js --no-save", 33 | "upver": "npm version patch && git push --follow-tags", 34 | "release": "npm run build && git push --follow-tags && npm publish", 35 | "beta": "npm run build && git push --follow-tags && npm publish --tag beta", 36 | "tsgen": "thrift-typescript --target apache --rootDir . --sourceDir . --outDir codegen parquet.thrift", 37 | "tsgencore": "thrift-typescript --target thrift-server --rootDir . --sourceDir . --outDir codegen parquet.thrift", 38 | "thrift": "thrift --gen js:node parquet.thrift && thrift --gen js:ts parquet.thrift" 39 | }, 40 | "engines": { 41 | "node": ">=7.6" 42 | }, 43 | "dependencies": { 44 | "bson": "^4.0.2", 45 | "int53": "^1.0.0", 46 | "node-int64": "^0.4.0", 47 | "thrift": "^0.12.0", 48 | "varint": "^5.0.0" 49 | }, 50 | "runtimeDependencies": { 51 | "brotli": "^1.3.2", 52 | "lzo": "^0.4.0", 53 | "lz4js": "^0.2.0" 54 | }, 55 | "devDependencies": { 56 | "@creditkarma/thrift-typescript": "^3.7.2", 57 | "@types/bson": "^4.0.0", 58 | "@types/chai": "^4.1.7", 59 | "@types/debug": "^4.1.4", 60 | "@types/jest": "^24.0.17", 61 | "@types/mocha": "^5.2.7", 62 | "@types/node": "^10.14.15", 63 | "@types/node-int64": "^0.4.29", 64 | "@types/thrift": "^0.10.8", 65 | "@types/varint": "^5.0.0", 66 | "assert": "^2.0.0", 67 | "brotli": "^1.3.2", 68 | "chai": "^4.2.0", 69 | "debug": "^4.1.1", 70 | "jest": "^24.8.0", 71 | "jest-environment-node": "^24.8.0", 72 | "lz4js": "^0.2.0", 73 | "lzo": "^0.4.11", 74 | "object-stream": "0.0.1", 75 | "prettier": "^2.1.2", 76 | "snappy": "^6.3.5", 77 | "ts-jest": "^24.0.2", 78 | "ts-node": "^8.3.0", 79 | "tslint": "^5.18.0", 80 | "tslint-config-airbnb": "^5.11.1", 81 | "typescript": "^3.5.3" 82 | }, 83 | "jest": { 84 | "testEnvironment": "node", 85 | "verbose": true, 86 | "transform": { 87 | "^.+\\.tsx?$": "ts-jest" 88 | }, 89 | "testRegex": "(test/.*|(\\.|/)(test|spec))\\.(jsx?|tsx?)$", 90 | "testPathIgnorePatterns": [ 91 | "/build_" 92 | ], 93 | "moduleFileExtensions": [ 94 | "ts", 95 | "tsx", 96 | "js", 97 | "jsx", 98 | "json", 99 | "node" 100 | ] 101 | }, 102 | "prettier": { 103 | "arrowParens": "avoid", 104 | "singleQuote": true 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/snappy/index.ts: -------------------------------------------------------------------------------- 1 | import { compressToBuffer, maxCompressedLength } from './compressor'; 2 | import { readUncompressedLength, uncompressToBuffer } from './decompressor'; 3 | 4 | function isNode() { 5 | if (typeof process === 'object') { 6 | if (typeof process.versions === 'object') { 7 | if (typeof process.versions.node !== 'undefined') { 8 | return true; 9 | } 10 | } 11 | } 12 | return false; 13 | } 14 | 15 | function isUint8Array(object: any) { 16 | return object instanceof Uint8Array && (!isNode() || !Buffer.isBuffer(object)); 17 | } 18 | 19 | function isArrayBuffer(object: any) { 20 | return object instanceof ArrayBuffer; 21 | } 22 | 23 | function isBuffer(object: any) { 24 | if (!isNode()) { 25 | return false; 26 | } 27 | return Buffer.isBuffer(object); 28 | } 29 | 30 | const TYPE_ERROR_MSG = 'Argument compressed must be type of ArrayBuffer, Buffer, or Uint8Array'; 31 | 32 | export function uncompress(compressed: Buffer): Buffer; 33 | export function uncompress(compressed: Uint8Array): Uint8Array; 34 | export function uncompress(compressed: ArrayBuffer): ArrayBuffer; 35 | export function uncompress(compressed: any) { 36 | if (!isUint8Array(compressed) && !isArrayBuffer(compressed) && !isBuffer(compressed)) { 37 | throw new TypeError(TYPE_ERROR_MSG); 38 | } 39 | let uint8Mode = false; 40 | let arrayBufferMode = false; 41 | let buffer: Buffer; 42 | if (isUint8Array(compressed)) { 43 | uint8Mode = true; 44 | buffer = Buffer.from(compressed.buffer, compressed.byteOffset, compressed.byteLength); 45 | } else if (isArrayBuffer(compressed)) { 46 | arrayBufferMode = true; 47 | buffer = Buffer.from(compressed); 48 | } else { 49 | buffer = compressed; 50 | } 51 | 52 | const length = readUncompressedLength(buffer); 53 | if (length === -1) throw new Error('Invalid Snappy bitstream'); 54 | const target: Buffer = Buffer.alloc(length); 55 | 56 | if (!uncompressToBuffer(buffer, target)) { 57 | throw new Error('Invalid Snappy bitstream'); 58 | } 59 | 60 | if (uint8Mode) { 61 | return new Uint8Array(target.buffer); 62 | } else if (arrayBufferMode) { 63 | return target.buffer; 64 | } else { 65 | return target; 66 | } 67 | } 68 | 69 | export function compress(uncompressed: Buffer): Buffer; 70 | export function compress(uncompressed: Uint8Array): Uint8Array; 71 | export function compress(uncompressed: ArrayBuffer): ArrayBuffer; 72 | export function compress(uncompressed: any) { 73 | if (!isUint8Array(uncompressed) && !isArrayBuffer(uncompressed) && !isBuffer(uncompressed)) { 74 | throw new TypeError(TYPE_ERROR_MSG); 75 | } 76 | let uint8Mode = false; 77 | let arrayBufferMode = false; 78 | let buffer: Buffer; 79 | if (isUint8Array(uncompressed)) { 80 | uint8Mode = true; 81 | buffer = Buffer.from(uncompressed.buffer, uncompressed.byteOffset, uncompressed.byteLength); 82 | } else if (isArrayBuffer(uncompressed)) { 83 | arrayBufferMode = true; 84 | buffer = Buffer.from(uncompressed); 85 | } else { 86 | buffer = uncompressed; 87 | } 88 | 89 | const maxLength = maxCompressedLength(buffer); 90 | const target: Buffer = Buffer.alloc(maxLength); 91 | const length = compressToBuffer(buffer, target); 92 | const array = target.buffer.slice(0, length); 93 | 94 | if (uint8Mode) { 95 | return new Uint8Array(array); 96 | } else if (arrayBufferMode) { 97 | return array; 98 | } else { 99 | return Buffer.from(array); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /test/codec_rle.ts: -------------------------------------------------------------------------------- 1 | import chai = require('chai'); 2 | const assert = chai.assert; 3 | import parquet_codec_rle = require('../src/codec/rle'); 4 | 5 | // tslint:disable:ter-prefer-arrow-callback 6 | describe('ParquetCodec::RLE', function () { 7 | 8 | it('should encode bitpacked values', function () { 9 | const buf = parquet_codec_rle.encodeValues( 10 | 'INT32', 11 | [0, 1, 2, 3, 4, 5, 6, 7], 12 | { 13 | disableEnvelope: true, 14 | bitWidth: 3 15 | }); 16 | 17 | assert.deepEqual(buf, Buffer.from([0x03, 0x88, 0xc6, 0xfa])); 18 | }); 19 | 20 | it('should decode bitpacked values', function () { 21 | const vals = parquet_codec_rle.decodeValues( 22 | 'INT32', 23 | { 24 | buffer: Buffer.from([0x03, 0x88, 0xc6, 0xfa]), 25 | offset: 0, 26 | }, 27 | 8, 28 | { 29 | disableEnvelope: true, 30 | bitWidth: 3 31 | }); 32 | 33 | assert.deepEqual(vals, [0, 1, 2, 3, 4, 5, 6, 7]); 34 | }); 35 | 36 | describe('number of values not a multiple of 8', function () { 37 | it('should encode bitpacked values', function () { 38 | const buf = parquet_codec_rle.encodeValues( 39 | 'INT32', 40 | [0, 1, 2, 3, 4, 5, 6, 7, 6, 5], 41 | { 42 | disableEnvelope: true, 43 | bitWidth: 3 44 | }); 45 | 46 | assert.deepEqual(buf, new Buffer([0x05, 0x88, 0xc6, 0xfa, 0x2e, 0x00, 0x00])); 47 | }); 48 | 49 | it('should decode bitpacked values', function () { 50 | const vals = parquet_codec_rle.decodeValues( 51 | 'INT32', 52 | { 53 | buffer: new Buffer([0x05, 0x88, 0xc6, 0xfa, 0x2e, 0x00, 0x00]), 54 | offset: 0, 55 | }, 56 | 10, 57 | { 58 | disableEnvelope: true, 59 | bitWidth: 3 60 | }); 61 | 62 | assert.deepEqual(vals, [0, 1, 2, 3, 4, 5, 6, 7, 6, 5]); 63 | }); 64 | }); 65 | 66 | it('should encode repeated values', function () { 67 | const buf = parquet_codec_rle.encodeValues( 68 | 'INT32', 69 | [42, 42, 42, 42, 42, 42, 42, 42], 70 | { 71 | disableEnvelope: true, 72 | bitWidth: 6 73 | }); 74 | 75 | assert.deepEqual(buf, Buffer.from([0x10, 0x2a])); 76 | }); 77 | 78 | it('should decode repeated values', function () { 79 | const vals = parquet_codec_rle.decodeValues( 80 | 'INT32', 81 | { 82 | buffer: Buffer.from([0x10, 0x2a]), 83 | offset: 0, 84 | }, 85 | 8, 86 | { 87 | disableEnvelope: true, 88 | bitWidth: 3 89 | }); 90 | 91 | assert.deepEqual(vals, [42, 42, 42, 42, 42, 42, 42, 42]); 92 | }); 93 | 94 | it('should encode mixed runs', function () { 95 | const buf = parquet_codec_rle.encodeValues( 96 | 'INT32', 97 | [0, 1, 2, 3, 4, 5, 6, 7, 4, 4, 4, 4, 4, 4, 4, 4, 0, 1, 2, 3, 4, 5, 6, 7], 98 | { 99 | disableEnvelope: true, 100 | bitWidth: 3 101 | }); 102 | 103 | assert.deepEqual(buf, Buffer.from([0x03, 0x88, 0xc6, 0xfa, 0x10, 0x04, 0x03, 0x88, 0xc6, 0xfa])); 104 | }); 105 | 106 | it('should decode mixed runs', function () { 107 | const vals = parquet_codec_rle.decodeValues( 108 | 'INT32', 109 | { 110 | buffer: Buffer.from([0x03, 0x88, 0xc6, 0xfa, 0x10, 0x04, 0x03, 0x88, 0xc6, 0xfa]), 111 | offset: 0, 112 | }, 113 | 24, 114 | { 115 | disableEnvelope: true, 116 | bitWidth: 3 117 | }); 118 | 119 | assert.deepEqual( 120 | vals, 121 | [0, 1, 2, 3, 4, 5, 6, 7, 4, 4, 4, 4, 4, 4, 4, 4, 0, 1, 2, 3, 4, 5, 6, 7]); 122 | }); 123 | 124 | }); 125 | -------------------------------------------------------------------------------- /src/thrift/DictionaryPageHeader.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | import * as Encoding from "./Encoding"; 9 | export interface IDictionaryPageHeaderArgs { 10 | num_values: number; 11 | encoding: Encoding.Encoding; 12 | is_sorted?: boolean; 13 | } 14 | export class DictionaryPageHeader { 15 | public num_values: number; 16 | public encoding: Encoding.Encoding; 17 | public is_sorted?: boolean; 18 | constructor(args: IDictionaryPageHeaderArgs) { 19 | if (args != null && args.num_values != null) { 20 | this.num_values = args.num_values; 21 | } 22 | else { 23 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[num_values] is unset!"); 24 | } 25 | if (args != null && args.encoding != null) { 26 | this.encoding = args.encoding; 27 | } 28 | else { 29 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[encoding] is unset!"); 30 | } 31 | if (args != null && args.is_sorted != null) { 32 | this.is_sorted = args.is_sorted; 33 | } 34 | } 35 | public write(output: thrift.TProtocol): void { 36 | output.writeStructBegin("DictionaryPageHeader"); 37 | if (this.num_values != null) { 38 | output.writeFieldBegin("num_values", thrift.Thrift.Type.I32, 1); 39 | output.writeI32(this.num_values); 40 | output.writeFieldEnd(); 41 | } 42 | if (this.encoding != null) { 43 | output.writeFieldBegin("encoding", thrift.Thrift.Type.I32, 2); 44 | output.writeI32(this.encoding); 45 | output.writeFieldEnd(); 46 | } 47 | if (this.is_sorted != null) { 48 | output.writeFieldBegin("is_sorted", thrift.Thrift.Type.BOOL, 3); 49 | output.writeBool(this.is_sorted); 50 | output.writeFieldEnd(); 51 | } 52 | output.writeFieldStop(); 53 | output.writeStructEnd(); 54 | return; 55 | } 56 | public static read(input: thrift.TProtocol): DictionaryPageHeader { 57 | input.readStructBegin(); 58 | let _args: any = {}; 59 | while (true) { 60 | const ret: thrift.TField = input.readFieldBegin(); 61 | const fieldType: thrift.Thrift.Type = ret.ftype; 62 | const fieldId: number = ret.fid; 63 | if (fieldType === thrift.Thrift.Type.STOP) { 64 | break; 65 | } 66 | switch (fieldId) { 67 | case 1: 68 | if (fieldType === thrift.Thrift.Type.I32) { 69 | const value_1: number = input.readI32(); 70 | _args.num_values = value_1; 71 | } 72 | else { 73 | input.skip(fieldType); 74 | } 75 | break; 76 | case 2: 77 | if (fieldType === thrift.Thrift.Type.I32) { 78 | const value_2: Encoding.Encoding = input.readI32(); 79 | _args.encoding = value_2; 80 | } 81 | else { 82 | input.skip(fieldType); 83 | } 84 | break; 85 | case 3: 86 | if (fieldType === thrift.Thrift.Type.BOOL) { 87 | const value_3: boolean = input.readBool(); 88 | _args.is_sorted = value_3; 89 | } 90 | else { 91 | input.skip(fieldType); 92 | } 93 | break; 94 | default: { 95 | input.skip(fieldType); 96 | } 97 | } 98 | input.readFieldEnd(); 99 | } 100 | input.readStructEnd(); 101 | if (_args.num_values !== undefined && _args.encoding !== undefined) { 102 | return new DictionaryPageHeader(_args); 103 | } 104 | else { 105 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read DictionaryPageHeader from input"); 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/thrift/SortingColumn.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | export interface ISortingColumnArgs { 9 | column_idx: number; 10 | descending: boolean; 11 | nulls_first: boolean; 12 | } 13 | export class SortingColumn { 14 | public column_idx: number; 15 | public descending: boolean; 16 | public nulls_first: boolean; 17 | constructor(args: ISortingColumnArgs) { 18 | if (args != null && args.column_idx != null) { 19 | this.column_idx = args.column_idx; 20 | } 21 | else { 22 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[column_idx] is unset!"); 23 | } 24 | if (args != null && args.descending != null) { 25 | this.descending = args.descending; 26 | } 27 | else { 28 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[descending] is unset!"); 29 | } 30 | if (args != null && args.nulls_first != null) { 31 | this.nulls_first = args.nulls_first; 32 | } 33 | else { 34 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[nulls_first] is unset!"); 35 | } 36 | } 37 | public write(output: thrift.TProtocol): void { 38 | output.writeStructBegin("SortingColumn"); 39 | if (this.column_idx != null) { 40 | output.writeFieldBegin("column_idx", thrift.Thrift.Type.I32, 1); 41 | output.writeI32(this.column_idx); 42 | output.writeFieldEnd(); 43 | } 44 | if (this.descending != null) { 45 | output.writeFieldBegin("descending", thrift.Thrift.Type.BOOL, 2); 46 | output.writeBool(this.descending); 47 | output.writeFieldEnd(); 48 | } 49 | if (this.nulls_first != null) { 50 | output.writeFieldBegin("nulls_first", thrift.Thrift.Type.BOOL, 3); 51 | output.writeBool(this.nulls_first); 52 | output.writeFieldEnd(); 53 | } 54 | output.writeFieldStop(); 55 | output.writeStructEnd(); 56 | return; 57 | } 58 | public static read(input: thrift.TProtocol): SortingColumn { 59 | input.readStructBegin(); 60 | let _args: any = {}; 61 | while (true) { 62 | const ret: thrift.TField = input.readFieldBegin(); 63 | const fieldType: thrift.Thrift.Type = ret.ftype; 64 | const fieldId: number = ret.fid; 65 | if (fieldType === thrift.Thrift.Type.STOP) { 66 | break; 67 | } 68 | switch (fieldId) { 69 | case 1: 70 | if (fieldType === thrift.Thrift.Type.I32) { 71 | const value_1: number = input.readI32(); 72 | _args.column_idx = value_1; 73 | } 74 | else { 75 | input.skip(fieldType); 76 | } 77 | break; 78 | case 2: 79 | if (fieldType === thrift.Thrift.Type.BOOL) { 80 | const value_2: boolean = input.readBool(); 81 | _args.descending = value_2; 82 | } 83 | else { 84 | input.skip(fieldType); 85 | } 86 | break; 87 | case 3: 88 | if (fieldType === thrift.Thrift.Type.BOOL) { 89 | const value_3: boolean = input.readBool(); 90 | _args.nulls_first = value_3; 91 | } 92 | else { 93 | input.skip(fieldType); 94 | } 95 | break; 96 | default: { 97 | input.skip(fieldType); 98 | } 99 | } 100 | input.readFieldEnd(); 101 | } 102 | input.readStructEnd(); 103 | if (_args.column_idx !== undefined && _args.descending !== undefined && _args.nulls_first !== undefined) { 104 | return new SortingColumn(_args); 105 | } 106 | else { 107 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read SortingColumn from input"); 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/thrift/PageEncodingStats.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | import * as Encoding from "./Encoding"; 9 | import * as PageType from "./PageType"; 10 | export interface IPageEncodingStatsArgs { 11 | page_type: PageType.PageType; 12 | encoding: Encoding.Encoding; 13 | count: number; 14 | } 15 | export class PageEncodingStats { 16 | public page_type: PageType.PageType; 17 | public encoding: Encoding.Encoding; 18 | public count: number; 19 | constructor(args: IPageEncodingStatsArgs) { 20 | if (args != null && args.page_type != null) { 21 | this.page_type = args.page_type; 22 | } 23 | else { 24 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[page_type] is unset!"); 25 | } 26 | if (args != null && args.encoding != null) { 27 | this.encoding = args.encoding; 28 | } 29 | else { 30 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[encoding] is unset!"); 31 | } 32 | if (args != null && args.count != null) { 33 | this.count = args.count; 34 | } 35 | else { 36 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[count] is unset!"); 37 | } 38 | } 39 | public write(output: thrift.TProtocol): void { 40 | output.writeStructBegin("PageEncodingStats"); 41 | if (this.page_type != null) { 42 | output.writeFieldBegin("page_type", thrift.Thrift.Type.I32, 1); 43 | output.writeI32(this.page_type); 44 | output.writeFieldEnd(); 45 | } 46 | if (this.encoding != null) { 47 | output.writeFieldBegin("encoding", thrift.Thrift.Type.I32, 2); 48 | output.writeI32(this.encoding); 49 | output.writeFieldEnd(); 50 | } 51 | if (this.count != null) { 52 | output.writeFieldBegin("count", thrift.Thrift.Type.I32, 3); 53 | output.writeI32(this.count); 54 | output.writeFieldEnd(); 55 | } 56 | output.writeFieldStop(); 57 | output.writeStructEnd(); 58 | return; 59 | } 60 | public static read(input: thrift.TProtocol): PageEncodingStats { 61 | input.readStructBegin(); 62 | let _args: any = {}; 63 | while (true) { 64 | const ret: thrift.TField = input.readFieldBegin(); 65 | const fieldType: thrift.Thrift.Type = ret.ftype; 66 | const fieldId: number = ret.fid; 67 | if (fieldType === thrift.Thrift.Type.STOP) { 68 | break; 69 | } 70 | switch (fieldId) { 71 | case 1: 72 | if (fieldType === thrift.Thrift.Type.I32) { 73 | const value_1: PageType.PageType = input.readI32(); 74 | _args.page_type = value_1; 75 | } 76 | else { 77 | input.skip(fieldType); 78 | } 79 | break; 80 | case 2: 81 | if (fieldType === thrift.Thrift.Type.I32) { 82 | const value_2: Encoding.Encoding = input.readI32(); 83 | _args.encoding = value_2; 84 | } 85 | else { 86 | input.skip(fieldType); 87 | } 88 | break; 89 | case 3: 90 | if (fieldType === thrift.Thrift.Type.I32) { 91 | const value_3: number = input.readI32(); 92 | _args.count = value_3; 93 | } 94 | else { 95 | input.skip(fieldType); 96 | } 97 | break; 98 | default: { 99 | input.skip(fieldType); 100 | } 101 | } 102 | input.readFieldEnd(); 103 | } 104 | input.readStructEnd(); 105 | if (_args.page_type !== undefined && _args.encoding !== undefined && _args.count !== undefined) { 106 | return new PageEncodingStats(_args); 107 | } 108 | else { 109 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read PageEncodingStats from input"); 110 | } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/compression.ts: -------------------------------------------------------------------------------- 1 | import { ParquetCompression } from './declare'; 2 | import * as Util from './util'; 3 | import zlib = require('zlib'); 4 | import snappyjs = require('./snappy'); 5 | 6 | let brotli: any; 7 | let lzo: any; 8 | let lz4js: any; 9 | 10 | export interface ParquetCompressionKit { 11 | deflate: (value: Buffer) => Buffer; 12 | inflate: (value: Buffer, size: number) => Buffer; 13 | } 14 | 15 | export const PARQUET_COMPRESSION_METHODS: Record = { 16 | UNCOMPRESSED: { 17 | deflate: deflate_identity, 18 | inflate: inflate_identity 19 | }, 20 | GZIP: { 21 | deflate: deflate_gzip, 22 | inflate: inflate_gzip 23 | }, 24 | SNAPPY: { 25 | deflate: deflate_snappy, 26 | inflate: inflate_snappy 27 | }, 28 | LZO: { 29 | deflate: deflate_lzo, 30 | inflate: inflate_lzo 31 | }, 32 | BROTLI: { 33 | deflate: deflate_brotli, 34 | inflate: inflate_brotli 35 | }, 36 | LZ4: { 37 | deflate: deflate_lz4, 38 | inflate: inflate_lz4 39 | } 40 | }; 41 | 42 | /** 43 | * Deflate a value using compression method `method` 44 | */ 45 | export function deflate(method: ParquetCompression, value: Buffer): Buffer { 46 | if (!(method in PARQUET_COMPRESSION_METHODS)) { 47 | throw new Error('invalid compression method: ' + method); 48 | } 49 | 50 | return PARQUET_COMPRESSION_METHODS[method].deflate(value); 51 | } 52 | 53 | function deflate_identity(value: Buffer): Buffer { 54 | return value; 55 | } 56 | 57 | function deflate_gzip(value: Buffer): Buffer { 58 | return zlib.gzipSync(value); 59 | } 60 | 61 | function deflate_snappy(value: Buffer): Buffer { 62 | return snappyjs.compress(value); 63 | } 64 | 65 | function deflate_lzo(value: Buffer): Buffer { 66 | lzo = lzo || Util.load('lzo'); 67 | return lzo.compress(value); 68 | } 69 | 70 | function deflate_brotli(value: Buffer): Buffer { 71 | brotli = brotli || Util.load('brotli'); 72 | const result = brotli.compress(value, { 73 | mode: 0, 74 | quality: 8, 75 | lgwin: 22 76 | }); 77 | return result ? Buffer.from(result) : Buffer.alloc(0); 78 | } 79 | 80 | function deflate_lz4(value: Buffer): Buffer { 81 | lz4js = lz4js || Util.load('lz4js'); 82 | try { 83 | // let result = Buffer.alloc(lz4js.encodeBound(value.length)); 84 | // const compressedSize = lz4.encodeBlock(value, result); 85 | // // remove unnecessary bytes 86 | // result = result.slice(0, compressedSize); 87 | // return result; 88 | return Buffer.from(lz4js.compress(value)); 89 | } catch (err) { 90 | throw err; 91 | } 92 | } 93 | 94 | /** 95 | * Inflate a value using compression method `method` 96 | */ 97 | export function inflate(method: ParquetCompression, value: Buffer, size: number): Buffer { 98 | if (!(method in PARQUET_COMPRESSION_METHODS)) { 99 | throw new Error('invalid compression method: ' + method); 100 | } 101 | 102 | return PARQUET_COMPRESSION_METHODS[method].inflate(value, size); 103 | } 104 | 105 | function inflate_identity(value: Buffer): Buffer { 106 | return value; 107 | } 108 | 109 | function inflate_gzip(value: Buffer): Buffer { 110 | return zlib.gunzipSync(value); 111 | } 112 | 113 | function inflate_snappy(value: Buffer): Buffer { 114 | return snappyjs.uncompress(value); 115 | } 116 | 117 | function inflate_lzo(value: Buffer, size: number): Buffer { 118 | lzo = lzo || Util.load('lzo'); 119 | return lzo.decompress(value, size); 120 | } 121 | 122 | function inflate_lz4(value: Buffer, size: number): Buffer { 123 | lz4js = lz4js || Util.load('lz4js'); 124 | try { 125 | // let result = Buffer.alloc(size); 126 | // const uncompressedSize = lz4js.decodeBlock(value, result); 127 | // // remove unnecessary bytes 128 | // result = result.slice(0, uncompressedSize); 129 | // return result; 130 | return Buffer.from(lz4js.decompress(value, size)); 131 | } catch (err) { 132 | throw err; 133 | } 134 | } 135 | 136 | function inflate_brotli(value: Buffer): Buffer { 137 | brotli = brotli || Util.load('brotli'); 138 | if (!value.length) { 139 | return Buffer.alloc(0); 140 | } 141 | return Buffer.from(brotli.decompress(value)); 142 | } 143 | -------------------------------------------------------------------------------- /src/thrift/TimeUnit.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | import * as MicroSeconds from "./MicroSeconds"; 9 | import * as MilliSeconds from "./MilliSeconds"; 10 | export interface ITimeUnitArgs { 11 | MILLIS?: MilliSeconds.MilliSeconds; 12 | MICROS?: MicroSeconds.MicroSeconds; 13 | } 14 | export class TimeUnit { 15 | public MILLIS?: MilliSeconds.MilliSeconds; 16 | public MICROS?: MicroSeconds.MicroSeconds; 17 | constructor(args?: ITimeUnitArgs) { 18 | let _fieldsSet: number = 0; 19 | if (args != null) { 20 | if (args.MILLIS != null) { 21 | _fieldsSet++; 22 | this.MILLIS = args.MILLIS; 23 | } 24 | if (args.MICROS != null) { 25 | _fieldsSet++; 26 | this.MICROS = args.MICROS; 27 | } 28 | if (_fieldsSet > 1) { 29 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.INVALID_DATA, "Cannot read a TUnion with more than one set value!"); 30 | } 31 | else if (_fieldsSet < 1) { 32 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.INVALID_DATA, "Cannot read a TUnion with no set value!"); 33 | } 34 | } 35 | } 36 | public static fromMILLIS(MILLIS: MilliSeconds.MilliSeconds): TimeUnit { 37 | return new TimeUnit({ MILLIS }); 38 | } 39 | public static fromMICROS(MICROS: MicroSeconds.MicroSeconds): TimeUnit { 40 | return new TimeUnit({ MICROS }); 41 | } 42 | public write(output: thrift.TProtocol): void { 43 | output.writeStructBegin("TimeUnit"); 44 | if (this.MILLIS != null) { 45 | output.writeFieldBegin("MILLIS", thrift.Thrift.Type.STRUCT, 1); 46 | this.MILLIS.write(output); 47 | output.writeFieldEnd(); 48 | } 49 | if (this.MICROS != null) { 50 | output.writeFieldBegin("MICROS", thrift.Thrift.Type.STRUCT, 2); 51 | this.MICROS.write(output); 52 | output.writeFieldEnd(); 53 | } 54 | output.writeFieldStop(); 55 | output.writeStructEnd(); 56 | return; 57 | } 58 | public static read(input: thrift.TProtocol): TimeUnit { 59 | let _fieldsSet: number = 0; 60 | let _returnValue: TimeUnit | null = null; 61 | input.readStructBegin(); 62 | while (true) { 63 | const ret: thrift.TField = input.readFieldBegin(); 64 | const fieldType: thrift.Thrift.Type = ret.ftype; 65 | const fieldId: number = ret.fid; 66 | if (fieldType === thrift.Thrift.Type.STOP) { 67 | break; 68 | } 69 | switch (fieldId) { 70 | case 1: 71 | if (fieldType === thrift.Thrift.Type.STRUCT) { 72 | _fieldsSet++; 73 | const value_1: MilliSeconds.MilliSeconds = MilliSeconds.MilliSeconds.read(input); 74 | _returnValue = TimeUnit.fromMILLIS(value_1); 75 | } 76 | else { 77 | input.skip(fieldType); 78 | } 79 | break; 80 | case 2: 81 | if (fieldType === thrift.Thrift.Type.STRUCT) { 82 | _fieldsSet++; 83 | const value_2: MicroSeconds.MicroSeconds = MicroSeconds.MicroSeconds.read(input); 84 | _returnValue = TimeUnit.fromMICROS(value_2); 85 | } 86 | else { 87 | input.skip(fieldType); 88 | } 89 | break; 90 | default: { 91 | input.skip(fieldType); 92 | } 93 | } 94 | input.readFieldEnd(); 95 | } 96 | input.readStructEnd(); 97 | if (_fieldsSet > 1) { 98 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.INVALID_DATA, "Cannot read a TUnion with more than one set value!"); 99 | } 100 | else if (_fieldsSet < 1) { 101 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.INVALID_DATA, "Cannot read a TUnion with no set value!"); 102 | } 103 | if (_returnValue !== null) { 104 | return _returnValue; 105 | } 106 | else { 107 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read data for TUnion"); 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/thrift/PageLocation.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import Int64 = require("node-int64"); 8 | import * as thrift from "thrift"; 9 | export interface IPageLocationArgs { 10 | offset: number | Int64; 11 | compressed_page_size: number; 12 | first_row_index: number | Int64; 13 | } 14 | export class PageLocation { 15 | public offset: Int64; 16 | public compressed_page_size: number; 17 | public first_row_index: Int64; 18 | constructor(args: IPageLocationArgs) { 19 | if (args != null && args.offset != null) { 20 | if (typeof args.offset === "number") { 21 | this.offset = new Int64(args.offset); 22 | } 23 | else { 24 | this.offset = args.offset; 25 | } 26 | } 27 | else { 28 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[offset] is unset!"); 29 | } 30 | if (args != null && args.compressed_page_size != null) { 31 | this.compressed_page_size = args.compressed_page_size; 32 | } 33 | else { 34 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[compressed_page_size] is unset!"); 35 | } 36 | if (args != null && args.first_row_index != null) { 37 | if (typeof args.first_row_index === "number") { 38 | this.first_row_index = new Int64(args.first_row_index); 39 | } 40 | else { 41 | this.first_row_index = args.first_row_index; 42 | } 43 | } 44 | else { 45 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[first_row_index] is unset!"); 46 | } 47 | } 48 | public write(output: thrift.TProtocol): void { 49 | output.writeStructBegin("PageLocation"); 50 | if (this.offset != null) { 51 | output.writeFieldBegin("offset", thrift.Thrift.Type.I64, 1); 52 | output.writeI64(this.offset); 53 | output.writeFieldEnd(); 54 | } 55 | if (this.compressed_page_size != null) { 56 | output.writeFieldBegin("compressed_page_size", thrift.Thrift.Type.I32, 2); 57 | output.writeI32(this.compressed_page_size); 58 | output.writeFieldEnd(); 59 | } 60 | if (this.first_row_index != null) { 61 | output.writeFieldBegin("first_row_index", thrift.Thrift.Type.I64, 3); 62 | output.writeI64(this.first_row_index); 63 | output.writeFieldEnd(); 64 | } 65 | output.writeFieldStop(); 66 | output.writeStructEnd(); 67 | return; 68 | } 69 | public static read(input: thrift.TProtocol): PageLocation { 70 | input.readStructBegin(); 71 | let _args: any = {}; 72 | while (true) { 73 | const ret: thrift.TField = input.readFieldBegin(); 74 | const fieldType: thrift.Thrift.Type = ret.ftype; 75 | const fieldId: number = ret.fid; 76 | if (fieldType === thrift.Thrift.Type.STOP) { 77 | break; 78 | } 79 | switch (fieldId) { 80 | case 1: 81 | if (fieldType === thrift.Thrift.Type.I64) { 82 | const value_1: Int64 = input.readI64(); 83 | _args.offset = value_1; 84 | } 85 | else { 86 | input.skip(fieldType); 87 | } 88 | break; 89 | case 2: 90 | if (fieldType === thrift.Thrift.Type.I32) { 91 | const value_2: number = input.readI32(); 92 | _args.compressed_page_size = value_2; 93 | } 94 | else { 95 | input.skip(fieldType); 96 | } 97 | break; 98 | case 3: 99 | if (fieldType === thrift.Thrift.Type.I64) { 100 | const value_3: Int64 = input.readI64(); 101 | _args.first_row_index = value_3; 102 | } 103 | else { 104 | input.skip(fieldType); 105 | } 106 | break; 107 | default: { 108 | input.skip(fieldType); 109 | } 110 | } 111 | input.readFieldEnd(); 112 | } 113 | input.readStructEnd(); 114 | if (_args.offset !== undefined && _args.compressed_page_size !== undefined && _args.first_row_index !== undefined) { 115 | return new PageLocation(_args); 116 | } 117 | else { 118 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read PageLocation from input"); 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /test/dremel.ts: -------------------------------------------------------------------------------- 1 | import chai = require('chai'); 2 | const assert = chai.assert; 3 | import parquet = require('../src'); 4 | import { ParquetBuffer } from '../src/declare'; 5 | 6 | // tslint:disable:ter-prefer-arrow-callback 7 | describe('ParquetShredder', function () { 8 | it('should shred Dremel example', function () { 9 | const schema = new parquet.ParquetSchema({ 10 | DocId: { type: 'INT64' }, 11 | Links: { 12 | optional: true, 13 | fields: { 14 | Backward: { 15 | repeated: true, 16 | type: 'INT64' 17 | }, 18 | Forward: { 19 | repeated: true, 20 | type: 'INT64' 21 | } 22 | } 23 | }, 24 | Name: { 25 | repeated: true, 26 | fields: { 27 | Language: { 28 | repeated: true, 29 | fields: { 30 | Code: { type: 'UTF8' }, 31 | Country: { type: 'UTF8', optional: true } 32 | } 33 | }, 34 | Url: { type: 'UTF8', optional: true } 35 | } 36 | } 37 | }); 38 | 39 | const r1 = { 40 | DocId: 10, 41 | Links: { 42 | Forward: [20, 40, 60] 43 | }, 44 | Name: [ 45 | { 46 | Language: [ 47 | { Code: 'en-us', Country: 'us' }, 48 | { Code: 'en' } 49 | ], 50 | Url: 'http://A' 51 | }, 52 | { 53 | Url: 'http://B' 54 | }, 55 | { 56 | Language: [ 57 | { Code: 'en-gb', Country: 'gb' } 58 | ] 59 | } 60 | ] 61 | }; 62 | 63 | const r2 = { 64 | DocId: 20, 65 | Links: { 66 | Backward: [10, 30], 67 | Forward: [80] 68 | }, 69 | Name: [ 70 | { 71 | Url: 'http://C' 72 | } 73 | ] 74 | }; 75 | 76 | const buffer: ParquetBuffer = {}; 77 | schema.shredRecord(r1, buffer); 78 | schema.shredRecord(r2, buffer); 79 | 80 | assert.equal(buffer.rowCount, 2); 81 | { 82 | const c = buffer.columnData[['DocId'].join()]; 83 | assert.deepEqual(c.rlevels, [0, 0]); 84 | assert.deepEqual(c.dlevels, [0, 0]); 85 | assert.deepEqual(c.values, [10, 20]); 86 | } 87 | { 88 | const c = buffer.columnData[['Links', 'Forward'].join()]; 89 | assert.deepEqual(c.rlevels, [0, 1, 1, 0]); 90 | assert.deepEqual(c.dlevels, [2, 2, 2, 2]); 91 | assert.deepEqual(c.values, [20, 40, 60, 80]); 92 | } 93 | { 94 | const c = buffer.columnData[['Links', 'Backward'].join()]; 95 | assert.deepEqual(c.rlevels, [0, 0, 1]); 96 | assert.deepEqual(c.dlevels, [1, 2, 2]); 97 | assert.deepEqual(c.values, [10, 30]); 98 | } 99 | { 100 | const c = buffer.columnData[['Name', 'Url'].join()]; 101 | assert.deepEqual(c.rlevels, [0, 1, 1, 0]); 102 | assert.deepEqual(c.dlevels, [2, 2, 1, 2]); 103 | assert.deepEqual(c.values.map(v => v.toString()), ['http://A', 'http://B', 'http://C']); 104 | } 105 | { 106 | const c = buffer.columnData[['Name', 'Language', 'Code'].join()]; 107 | assert.deepEqual(c.rlevels, [0, 2, 1, 1, 0]); 108 | assert.deepEqual(c.dlevels, [2, 2, 1, 2, 1]); 109 | assert.deepEqual(c.values.map(v => v.toString()), ['en-us', 'en', 'en-gb']); 110 | } 111 | { 112 | const c = buffer.columnData[['Name', 'Language', 'Country'].join()]; 113 | assert.deepEqual(c.rlevels, [0, 2, 1, 1, 0]); 114 | assert.deepEqual(c.dlevels, [3, 2, 1, 3, 1]); 115 | assert.deepEqual(c.values.map(v => v.toString()), ['us', 'gb']); 116 | } 117 | 118 | const records = schema.materializeRecords(buffer); 119 | assert.deepEqual(records[0], r1); 120 | assert.deepEqual(records[1], r2); 121 | }); 122 | 123 | it('should shred a optional nested record with blank optional value', function () { 124 | const schema = new parquet.ParquetSchema({ 125 | fruit: { 126 | optional: true, 127 | fields: { 128 | color: { type: 'UTF8', repeated: true }, 129 | type: { type: 'UTF8', optional: true } 130 | } 131 | } 132 | }); 133 | 134 | const buffer: ParquetBuffer = {}; 135 | schema.shredRecord({}, buffer); 136 | schema.shredRecord({ fruit: {} }, buffer); 137 | schema.shredRecord({ fruit: { color: [] } }, buffer); 138 | schema.shredRecord({ fruit: { color: ['red', 'blue'], type: 'x' } }, buffer); 139 | 140 | const records = schema.materializeRecords(buffer); 141 | assert.deepEqual(records[0], {}); 142 | assert.deepEqual(records[1], { fruit: {} }); 143 | assert.deepEqual(records[2], { fruit: {} }); 144 | assert.deepEqual(records[3], { fruit: { color: ['red', 'blue'], type: 'x' } }); 145 | }); 146 | }); 147 | -------------------------------------------------------------------------------- /src/codec/rle.ts: -------------------------------------------------------------------------------- 1 | import varint = require('varint'); 2 | import { PrimitiveType } from '../declare'; 3 | import { CursorBuffer, ParquetCodecOptions } from './declare'; 4 | 5 | function encodeRunBitpacked(values: number[], opts: ParquetCodecOptions): Buffer { 6 | for (let i = 0; i < values.length % 8; i++) { 7 | values.push(0); 8 | } 9 | 10 | const buf = Buffer.alloc(Math.ceil(opts.bitWidth * (values.length / 8))); 11 | for (let b = 0; b < opts.bitWidth * values.length; b++) { 12 | if ((values[Math.floor(b / opts.bitWidth)] & (1 << b % opts.bitWidth)) > 0) { 13 | buf[Math.floor(b / 8)] |= (1 << (b % 8)); 14 | } 15 | } 16 | 17 | return Buffer.concat([ 18 | Buffer.from(varint.encode(((values.length / 8) << 1) | 1)), 19 | buf 20 | ]); 21 | } 22 | 23 | function encodeRunRepeated(value: number, count: number, opts: ParquetCodecOptions): Buffer { 24 | const buf = Buffer.alloc(Math.ceil(opts.bitWidth / 8)); 25 | 26 | for (let i = 0; i < buf.length; i++) { 27 | buf.writeUInt8(value & 0xff, i); 28 | value >> 8; 29 | } 30 | 31 | return Buffer.concat([ 32 | Buffer.from(varint.encode(count << 1)), 33 | buf 34 | ]); 35 | } 36 | 37 | export function encodeValues(type: PrimitiveType, values: any[], opts: ParquetCodecOptions): Buffer { 38 | if (!('bitWidth' in opts)) { 39 | throw new Error('bitWidth is required'); 40 | } 41 | 42 | switch (type) { 43 | 44 | case 'BOOLEAN': 45 | case 'INT32': 46 | case 'INT64': 47 | // tslint:disable-next-line:no-parameter-reassignment 48 | values = values.map(x => parseInt(x, 10)); 49 | break; 50 | 51 | default: 52 | throw new Error(`unsupported type: ${type}`); 53 | } 54 | 55 | let buf = Buffer.alloc(0); 56 | let run = []; 57 | let repeats = 0; 58 | 59 | for (let i = 0; i < values.length; i++) { 60 | // If we are at the beginning of a run and the next value is same we start 61 | // collecting repeated values 62 | if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) { 63 | // If we have any data in runs we need to encode them 64 | if (run.length) { 65 | buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]); 66 | run = []; 67 | } 68 | repeats = 1; 69 | } else if (repeats > 0 && values[i] === values[i - 1]) { 70 | repeats += 1; 71 | } else { 72 | // If values changes we need to post any previous repeated values 73 | if (repeats) { 74 | buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]); 75 | repeats = 0; 76 | } 77 | run.push(values[i]); 78 | } 79 | } 80 | 81 | if (repeats) { 82 | buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]); 83 | } else if (run.length) { 84 | buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]); 85 | } 86 | 87 | if (opts.disableEnvelope) { 88 | return buf; 89 | } 90 | 91 | const envelope = Buffer.alloc(buf.length + 4); 92 | envelope.writeUInt32LE(buf.length, undefined); 93 | buf.copy(envelope, 4); 94 | 95 | return envelope; 96 | } 97 | 98 | function decodeRunBitpacked(cursor: CursorBuffer, count: number, opts: ParquetCodecOptions): number[] { 99 | if (count % 8 !== 0) { 100 | throw new Error('must be a multiple of 8'); 101 | } 102 | 103 | // tslint:disable-next-line:prefer-array-literal 104 | const values = new Array(count).fill(0); 105 | for (let b = 0; b < opts.bitWidth * count; b++) { 106 | if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << (b % 8))) { 107 | values[Math.floor(b / opts.bitWidth)] |= (1 << b % opts.bitWidth); 108 | } 109 | } 110 | 111 | cursor.offset += opts.bitWidth * (count / 8); 112 | return values; 113 | } 114 | 115 | function decodeRunRepeated(cursor: CursorBuffer, count: number, opts: ParquetCodecOptions): number[] { 116 | let value = 0; 117 | for (let i = 0; i < Math.ceil(opts.bitWidth / 8); i++) { 118 | value << 8; 119 | value += cursor.buffer[cursor.offset]; 120 | cursor.offset += 1; 121 | } 122 | 123 | // tslint:disable-next-line:prefer-array-literal 124 | return new Array(count).fill(value); 125 | } 126 | 127 | export function decodeValues(type: PrimitiveType, cursor: CursorBuffer, count: number, opts: ParquetCodecOptions): number[] { 128 | if (!('bitWidth' in opts)) { 129 | throw new Error('bitWidth is required'); 130 | } 131 | 132 | if (!opts.disableEnvelope) { 133 | cursor.offset += 4; 134 | } 135 | 136 | let values: number[] = []; 137 | while (values.length < count) { 138 | const header = varint.decode(cursor.buffer, cursor.offset); 139 | cursor.offset += varint.encodingLength(header); 140 | if (header & 1) { 141 | const count = (header >> 1) * 8; 142 | values.push(...decodeRunBitpacked(cursor, count, opts)); 143 | } else { 144 | const count = header >> 1; 145 | values.push(...decodeRunRepeated(cursor, count, opts)); 146 | } 147 | } 148 | values = values.slice(0, count); 149 | 150 | if (values.length !== count) { 151 | throw new Error('invalid RLE encoding'); 152 | } 153 | 154 | return values; 155 | } 156 | -------------------------------------------------------------------------------- /src/thrift/Statistics.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import Int64 = require("node-int64"); 8 | import * as thrift from "thrift"; 9 | export interface IStatisticsArgs { 10 | max?: Buffer; 11 | min?: Buffer; 12 | null_count?: number | Int64; 13 | distinct_count?: number | Int64; 14 | max_value?: Buffer; 15 | min_value?: Buffer; 16 | } 17 | export class Statistics { 18 | public max?: Buffer; 19 | public min?: Buffer; 20 | public null_count?: Int64; 21 | public distinct_count?: Int64; 22 | public max_value?: Buffer; 23 | public min_value?: Buffer; 24 | constructor(args?: IStatisticsArgs) { 25 | if (args != null && args.max != null) { 26 | this.max = args.max; 27 | } 28 | if (args != null && args.min != null) { 29 | this.min = args.min; 30 | } 31 | if (args != null && args.null_count != null) { 32 | if (typeof args.null_count === "number") { 33 | this.null_count = new Int64(args.null_count); 34 | } 35 | else { 36 | this.null_count = args.null_count; 37 | } 38 | } 39 | if (args != null && args.distinct_count != null) { 40 | if (typeof args.distinct_count === "number") { 41 | this.distinct_count = new Int64(args.distinct_count); 42 | } 43 | else { 44 | this.distinct_count = args.distinct_count; 45 | } 46 | } 47 | if (args != null && args.max_value != null) { 48 | this.max_value = args.max_value; 49 | } 50 | if (args != null && args.min_value != null) { 51 | this.min_value = args.min_value; 52 | } 53 | } 54 | public write(output: thrift.TProtocol): void { 55 | output.writeStructBegin("Statistics"); 56 | if (this.max != null) { 57 | output.writeFieldBegin("max", thrift.Thrift.Type.STRING, 1); 58 | output.writeBinary(this.max); 59 | output.writeFieldEnd(); 60 | } 61 | if (this.min != null) { 62 | output.writeFieldBegin("min", thrift.Thrift.Type.STRING, 2); 63 | output.writeBinary(this.min); 64 | output.writeFieldEnd(); 65 | } 66 | if (this.null_count != null) { 67 | output.writeFieldBegin("null_count", thrift.Thrift.Type.I64, 3); 68 | output.writeI64(this.null_count); 69 | output.writeFieldEnd(); 70 | } 71 | if (this.distinct_count != null) { 72 | output.writeFieldBegin("distinct_count", thrift.Thrift.Type.I64, 4); 73 | output.writeI64(this.distinct_count); 74 | output.writeFieldEnd(); 75 | } 76 | if (this.max_value != null) { 77 | output.writeFieldBegin("max_value", thrift.Thrift.Type.STRING, 5); 78 | output.writeBinary(this.max_value); 79 | output.writeFieldEnd(); 80 | } 81 | if (this.min_value != null) { 82 | output.writeFieldBegin("min_value", thrift.Thrift.Type.STRING, 6); 83 | output.writeBinary(this.min_value); 84 | output.writeFieldEnd(); 85 | } 86 | output.writeFieldStop(); 87 | output.writeStructEnd(); 88 | return; 89 | } 90 | public static read(input: thrift.TProtocol): Statistics { 91 | input.readStructBegin(); 92 | let _args: any = {}; 93 | while (true) { 94 | const ret: thrift.TField = input.readFieldBegin(); 95 | const fieldType: thrift.Thrift.Type = ret.ftype; 96 | const fieldId: number = ret.fid; 97 | if (fieldType === thrift.Thrift.Type.STOP) { 98 | break; 99 | } 100 | switch (fieldId) { 101 | case 1: 102 | if (fieldType === thrift.Thrift.Type.STRING) { 103 | const value_1: Buffer = input.readBinary(); 104 | _args.max = value_1; 105 | } 106 | else { 107 | input.skip(fieldType); 108 | } 109 | break; 110 | case 2: 111 | if (fieldType === thrift.Thrift.Type.STRING) { 112 | const value_2: Buffer = input.readBinary(); 113 | _args.min = value_2; 114 | } 115 | else { 116 | input.skip(fieldType); 117 | } 118 | break; 119 | case 3: 120 | if (fieldType === thrift.Thrift.Type.I64) { 121 | const value_3: Int64 = input.readI64(); 122 | _args.null_count = value_3; 123 | } 124 | else { 125 | input.skip(fieldType); 126 | } 127 | break; 128 | case 4: 129 | if (fieldType === thrift.Thrift.Type.I64) { 130 | const value_4: Int64 = input.readI64(); 131 | _args.distinct_count = value_4; 132 | } 133 | else { 134 | input.skip(fieldType); 135 | } 136 | break; 137 | case 5: 138 | if (fieldType === thrift.Thrift.Type.STRING) { 139 | const value_5: Buffer = input.readBinary(); 140 | _args.max_value = value_5; 141 | } 142 | else { 143 | input.skip(fieldType); 144 | } 145 | break; 146 | case 6: 147 | if (fieldType === thrift.Thrift.Type.STRING) { 148 | const value_6: Buffer = input.readBinary(); 149 | _args.min_value = value_6; 150 | } 151 | else { 152 | input.skip(fieldType); 153 | } 154 | break; 155 | default: { 156 | input.skip(fieldType); 157 | } 158 | } 159 | input.readFieldEnd(); 160 | } 161 | input.readStructEnd(); 162 | return new Statistics(_args); 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /src/schema.ts: -------------------------------------------------------------------------------- 1 | import { PARQUET_CODEC } from './codec'; 2 | import { PARQUET_COMPRESSION_METHODS } from './compression'; 3 | import { FieldDefinition, ParquetBuffer, ParquetCompression, ParquetField, ParquetRecord, RepetitionType, SchemaDefinition } from './declare'; 4 | import { materializeRecords, shredBuffer, shredRecord } from './shred'; 5 | import { PARQUET_LOGICAL_TYPES } from './types'; 6 | 7 | /** 8 | * A parquet file schema 9 | */ 10 | export class ParquetSchema { 11 | public schema: Record; 12 | public fields: Record; 13 | public fieldList: ParquetField[]; 14 | 15 | /** 16 | * Create a new schema from a JSON schema definition 17 | */ 18 | constructor(schema: SchemaDefinition) { 19 | this.schema = schema; 20 | this.fields = buildFields(schema, 0, 0, []); 21 | this.fieldList = listFields(this.fields); 22 | } 23 | 24 | /** 25 | * Retrieve a field definition 26 | */ 27 | findField(path: string): ParquetField; 28 | findField(path: string[]): ParquetField; 29 | findField(path: any): ParquetField { 30 | if (path.constructor !== Array) { 31 | // tslint:disable-next-line:no-parameter-reassignment 32 | path = path.split(','); 33 | } else { 34 | // tslint:disable-next-line:no-parameter-reassignment 35 | path = path.slice(0); // clone array 36 | } 37 | 38 | let n = this.fields; 39 | for (; path.length > 1; path.shift()) { 40 | n = n[path[0]].fields; 41 | } 42 | 43 | return n[path[0]]; 44 | } 45 | 46 | /** 47 | * Retrieve a field definition and all the field's ancestors 48 | */ 49 | findFieldBranch(path: string): ParquetField[]; 50 | findFieldBranch(path: string[]): ParquetField[]; 51 | findFieldBranch(path: any): any[] { 52 | if (path.constructor !== Array) { 53 | // tslint:disable-next-line:no-parameter-reassignment 54 | path = path.split(','); 55 | } 56 | const branch = []; 57 | let n = this.fields; 58 | for (; path.length > 0; path.shift()) { 59 | branch.push(n[path[0]]); 60 | if (path.length > 1) { 61 | n = n[path[0]].fields; 62 | } 63 | } 64 | return branch; 65 | } 66 | 67 | shredRecord(record: ParquetRecord, buffer: ParquetBuffer): void { 68 | shredRecord(this, record, buffer); 69 | } 70 | 71 | materializeRecords(buffer: ParquetBuffer): ParquetRecord[] { 72 | return materializeRecords(this, buffer); 73 | } 74 | 75 | compress(type: ParquetCompression): this { 76 | setCompress(this.schema, type); 77 | setCompress(this.fields, type); 78 | return this; 79 | } 80 | 81 | buffer(): ParquetBuffer { 82 | return shredBuffer(this); 83 | } 84 | } 85 | 86 | function setCompress(schema: any, type: ParquetCompression) { 87 | for (const name in schema) { 88 | const node = schema[name]; 89 | if (node.fields) { 90 | setCompress(node.fields, type); 91 | } else { 92 | node.compression = type; 93 | } 94 | } 95 | } 96 | 97 | function buildFields( 98 | schema: SchemaDefinition, 99 | rLevelParentMax: number, 100 | dLevelParentMax: number, 101 | path: string[] 102 | ): Record { 103 | const fieldList: Record = {}; 104 | 105 | for (const name in schema) { 106 | const opts = schema[name]; 107 | 108 | /* field repetition type */ 109 | const required = !opts.optional; 110 | const repeated = !!opts.repeated; 111 | let rLevelMax = rLevelParentMax; 112 | let dLevelMax = dLevelParentMax; 113 | 114 | let repetitionType: RepetitionType = 'REQUIRED'; 115 | if (!required) { 116 | repetitionType = 'OPTIONAL'; 117 | dLevelMax++; 118 | } 119 | if (repeated) { 120 | repetitionType = 'REPEATED'; 121 | rLevelMax++; 122 | if (required) dLevelMax++; 123 | } 124 | 125 | /* nested field */ 126 | if (opts.fields) { 127 | const cpath = path.concat([name]); 128 | fieldList[name] = { 129 | name, 130 | path: cpath, 131 | key: cpath.join(), 132 | repetitionType, 133 | rLevelMax, 134 | dLevelMax, 135 | isNested: true, 136 | fieldCount: Object.keys(opts.fields).length, 137 | fields: buildFields( 138 | opts.fields, 139 | rLevelMax, 140 | dLevelMax, 141 | cpath 142 | ) 143 | }; 144 | continue; 145 | } 146 | 147 | const typeDef: any = PARQUET_LOGICAL_TYPES[opts.type]; 148 | if (!typeDef) { 149 | throw new Error(`invalid parquet type: ${opts.type}`); 150 | } 151 | 152 | opts.encoding = opts.encoding || 'PLAIN'; 153 | if (!(opts.encoding in PARQUET_CODEC)) { 154 | throw new Error(`unsupported parquet encoding: ${opts.encoding}`); 155 | } 156 | 157 | opts.compression = opts.compression || 'UNCOMPRESSED'; 158 | if (!(opts.compression in PARQUET_COMPRESSION_METHODS)) { 159 | throw new Error(`unsupported compression method: ${opts.compression}`); 160 | } 161 | 162 | /* add to schema */ 163 | const cpath = path.concat([name]); 164 | fieldList[name] = { 165 | name, 166 | primitiveType: typeDef.primitiveType, 167 | originalType: typeDef.originalType, 168 | path: cpath, 169 | key: cpath.join(), 170 | repetitionType, 171 | encoding: opts.encoding, 172 | compression: opts.compression, 173 | typeLength: opts.typeLength || typeDef.typeLength, 174 | rLevelMax, 175 | dLevelMax 176 | }; 177 | } 178 | return fieldList; 179 | } 180 | 181 | function listFields(fields: Record): ParquetField[] { 182 | let list: ParquetField[] = []; 183 | for (const k in fields) { 184 | list.push(fields[k]); 185 | if (fields[k].isNested) { 186 | list = list.concat(listFields(fields[k].fields)); 187 | } 188 | } 189 | return list; 190 | } 191 | -------------------------------------------------------------------------------- /src/util.ts: -------------------------------------------------------------------------------- 1 | import fs = require('fs'); 2 | import { TBufferedTransport, TCompactProtocol, TFramedTransport } from 'thrift'; 3 | import { FileMetaData, PageHeader } from './thrift'; 4 | import { Writable } from 'stream'; 5 | 6 | export interface WriteStreamOptions { 7 | flags?: string; 8 | encoding?: string; 9 | fd?: number; 10 | mode?: number; 11 | autoClose?: boolean; 12 | start?: number; 13 | } 14 | 15 | class UFramedTransport extends TFramedTransport { 16 | public readPos: number; 17 | } 18 | 19 | /** 20 | * Helper function that serializes a thrift object into a buffer 21 | */ 22 | export function serializeThrift(obj: any): Buffer { 23 | const output: Buffer[] = []; 24 | 25 | const transport = new TBufferedTransport(null, (buf) => { 26 | output.push(buf); 27 | }); 28 | 29 | const protocol = new TCompactProtocol(transport); 30 | obj.write(protocol); 31 | transport.flush(); 32 | 33 | return Buffer.concat(output); 34 | } 35 | 36 | export function decodeThrift(obj: any, buf: Buffer, offset?: number) { 37 | if (!offset) { 38 | // tslint:disable-next-line:no-parameter-reassignment 39 | offset = 0; 40 | } 41 | 42 | const transport = new UFramedTransport(buf); 43 | transport.readPos = offset; 44 | const protocol = new TCompactProtocol(transport); 45 | obj.read(protocol); 46 | return transport.readPos - offset; 47 | } 48 | 49 | export function decodeFileMetadata(buf: Buffer, offset?: number) { 50 | if (!offset) { 51 | // tslint:disable-next-line:no-parameter-reassignment 52 | offset = 0; 53 | } 54 | 55 | const transport = new UFramedTransport(buf); 56 | transport.readPos = offset; 57 | const protocol = new TCompactProtocol(transport); 58 | const metadata = FileMetaData.read(protocol); 59 | return { length: transport.readPos - offset, metadata }; 60 | } 61 | 62 | export function decodePageHeader(buf: Buffer, offset?: number) { 63 | if (!offset) { 64 | // tslint:disable-next-line:no-parameter-reassignment 65 | offset = 0; 66 | } 67 | 68 | const transport = new UFramedTransport(buf); 69 | transport.readPos = offset; 70 | const protocol = new TCompactProtocol(transport); 71 | const pageHeader = PageHeader.read(protocol); 72 | return { length: transport.readPos - offset, pageHeader }; 73 | } 74 | 75 | /** 76 | * Get the number of bits required to store a given value 77 | */ 78 | export function getBitWidth(val: number): number { 79 | if (val === 0) { 80 | return 0; 81 | // tslint:disable-next-line:no-else-after-return 82 | } else { 83 | return Math.ceil(Math.log2(val + 1)); 84 | } 85 | } 86 | 87 | /** 88 | * FIXME not ideal that this is linear 89 | */ 90 | export function getThriftEnum(klass: any, value: number | string): string { 91 | for (const k in klass) { 92 | if (klass[k] === value) { 93 | return k; 94 | } 95 | } 96 | throw new Error('Invalid ENUM value'); 97 | } 98 | 99 | export function fopen(filePath: string): Promise { 100 | return new Promise((resolve, reject) => { 101 | fs.open(filePath, 'r', (err, fd) => { 102 | if (err) { 103 | reject(err); 104 | } else { 105 | resolve(fd); 106 | } 107 | }); 108 | }); 109 | } 110 | 111 | export function fstat(filePath: string): Promise { 112 | return new Promise((resolve, reject) => { 113 | fs.stat(filePath, (err, stat) => { 114 | if (err) { 115 | reject(err); 116 | } else { 117 | resolve(stat); 118 | } 119 | }); 120 | }); 121 | } 122 | 123 | export function fread(fd: number, position: number, length: number): Promise { 124 | const buffer = Buffer.alloc(length); 125 | return new Promise((resolve, reject) => { 126 | fs.read(fd, buffer, 0, length, position, (err, bytesRead, buf) => { 127 | if (err || bytesRead !== length) { 128 | reject(err || Error('read failed')); 129 | } else { 130 | resolve(buf); 131 | } 132 | }); 133 | }); 134 | } 135 | 136 | export function fclose(fd: number): Promise { 137 | return new Promise((resolve, reject) => { 138 | fs.close(fd, (err) => { 139 | if (err) { 140 | reject(err); 141 | } else { 142 | resolve(); 143 | } 144 | }); 145 | }); 146 | } 147 | 148 | export function oswrite(os: Writable, buf: Buffer): Promise { 149 | return new Promise((resolve, reject) => { 150 | os.write(buf, (err) => { 151 | if (err) { 152 | reject(err); 153 | } else { 154 | resolve(); 155 | } 156 | }); 157 | }); 158 | } 159 | 160 | export function osclose(os: Writable): Promise { 161 | return new Promise((resolve, reject) => { 162 | (os as any).close((err: any) => { 163 | if (err) { 164 | reject(err); 165 | } else { 166 | resolve(); 167 | } 168 | }); 169 | }); 170 | } 171 | 172 | export function osopen(path: string, opts: WriteStreamOptions): Promise { 173 | return new Promise((resolve, reject) => { 174 | const outputStream = fs.createWriteStream(path, opts); 175 | outputStream.once('open', fd => resolve(outputStream)); 176 | outputStream.once('error', err => reject(err)); 177 | }); 178 | } 179 | 180 | // Supports MQTT path wildcards 181 | // + all immediate children 182 | // # all descendents 183 | export function fieldIndexOf(arr: string[][], elem: string[]): number { 184 | for (let j = 0; j < arr.length; j++) { 185 | if (arr[j].length > elem.length) continue; 186 | let m = true; 187 | for (let i = 0; i < elem.length; i++) { 188 | if (arr[j][i] === elem[i] || arr[j][i] === '+' || arr[j][i] === '#') continue; 189 | if (i >= arr[j].length && arr[j][arr[j].length - 1] === '#') continue; 190 | m = false; 191 | break; 192 | } 193 | if (m) return j; 194 | } 195 | return -1; 196 | } 197 | 198 | export function load(name: string): any { 199 | return (module || global as any)['require'](name); 200 | } 201 | -------------------------------------------------------------------------------- /src/thrift/DataPageHeader.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | import * as Encoding from "./Encoding"; 9 | import * as Statistics from "./Statistics"; 10 | export interface IDataPageHeaderArgs { 11 | num_values: number; 12 | encoding: Encoding.Encoding; 13 | definition_level_encoding: Encoding.Encoding; 14 | repetition_level_encoding: Encoding.Encoding; 15 | statistics?: Statistics.Statistics; 16 | } 17 | export class DataPageHeader { 18 | public num_values: number; 19 | public encoding: Encoding.Encoding; 20 | public definition_level_encoding: Encoding.Encoding; 21 | public repetition_level_encoding: Encoding.Encoding; 22 | public statistics?: Statistics.Statistics; 23 | constructor(args: IDataPageHeaderArgs) { 24 | if (args != null && args.num_values != null) { 25 | this.num_values = args.num_values; 26 | } 27 | else { 28 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[num_values] is unset!"); 29 | } 30 | if (args != null && args.encoding != null) { 31 | this.encoding = args.encoding; 32 | } 33 | else { 34 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[encoding] is unset!"); 35 | } 36 | if (args != null && args.definition_level_encoding != null) { 37 | this.definition_level_encoding = args.definition_level_encoding; 38 | } 39 | else { 40 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[definition_level_encoding] is unset!"); 41 | } 42 | if (args != null && args.repetition_level_encoding != null) { 43 | this.repetition_level_encoding = args.repetition_level_encoding; 44 | } 45 | else { 46 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[repetition_level_encoding] is unset!"); 47 | } 48 | if (args != null && args.statistics != null) { 49 | this.statistics = args.statistics; 50 | } 51 | } 52 | public write(output: thrift.TProtocol): void { 53 | output.writeStructBegin("DataPageHeader"); 54 | if (this.num_values != null) { 55 | output.writeFieldBegin("num_values", thrift.Thrift.Type.I32, 1); 56 | output.writeI32(this.num_values); 57 | output.writeFieldEnd(); 58 | } 59 | if (this.encoding != null) { 60 | output.writeFieldBegin("encoding", thrift.Thrift.Type.I32, 2); 61 | output.writeI32(this.encoding); 62 | output.writeFieldEnd(); 63 | } 64 | if (this.definition_level_encoding != null) { 65 | output.writeFieldBegin("definition_level_encoding", thrift.Thrift.Type.I32, 3); 66 | output.writeI32(this.definition_level_encoding); 67 | output.writeFieldEnd(); 68 | } 69 | if (this.repetition_level_encoding != null) { 70 | output.writeFieldBegin("repetition_level_encoding", thrift.Thrift.Type.I32, 4); 71 | output.writeI32(this.repetition_level_encoding); 72 | output.writeFieldEnd(); 73 | } 74 | if (this.statistics != null) { 75 | output.writeFieldBegin("statistics", thrift.Thrift.Type.STRUCT, 5); 76 | this.statistics.write(output); 77 | output.writeFieldEnd(); 78 | } 79 | output.writeFieldStop(); 80 | output.writeStructEnd(); 81 | return; 82 | } 83 | public static read(input: thrift.TProtocol): DataPageHeader { 84 | input.readStructBegin(); 85 | let _args: any = {}; 86 | while (true) { 87 | const ret: thrift.TField = input.readFieldBegin(); 88 | const fieldType: thrift.Thrift.Type = ret.ftype; 89 | const fieldId: number = ret.fid; 90 | if (fieldType === thrift.Thrift.Type.STOP) { 91 | break; 92 | } 93 | switch (fieldId) { 94 | case 1: 95 | if (fieldType === thrift.Thrift.Type.I32) { 96 | const value_1: number = input.readI32(); 97 | _args.num_values = value_1; 98 | } 99 | else { 100 | input.skip(fieldType); 101 | } 102 | break; 103 | case 2: 104 | if (fieldType === thrift.Thrift.Type.I32) { 105 | const value_2: Encoding.Encoding = input.readI32(); 106 | _args.encoding = value_2; 107 | } 108 | else { 109 | input.skip(fieldType); 110 | } 111 | break; 112 | case 3: 113 | if (fieldType === thrift.Thrift.Type.I32) { 114 | const value_3: Encoding.Encoding = input.readI32(); 115 | _args.definition_level_encoding = value_3; 116 | } 117 | else { 118 | input.skip(fieldType); 119 | } 120 | break; 121 | case 4: 122 | if (fieldType === thrift.Thrift.Type.I32) { 123 | const value_4: Encoding.Encoding = input.readI32(); 124 | _args.repetition_level_encoding = value_4; 125 | } 126 | else { 127 | input.skip(fieldType); 128 | } 129 | break; 130 | case 5: 131 | if (fieldType === thrift.Thrift.Type.STRUCT) { 132 | const value_5: Statistics.Statistics = Statistics.Statistics.read(input); 133 | _args.statistics = value_5; 134 | } 135 | else { 136 | input.skip(fieldType); 137 | } 138 | break; 139 | default: { 140 | input.skip(fieldType); 141 | } 142 | } 143 | input.readFieldEnd(); 144 | } 145 | input.readStructEnd(); 146 | if (_args.num_values !== undefined && _args.encoding !== undefined && _args.definition_level_encoding !== undefined && _args.repetition_level_encoding !== undefined) { 147 | return new DataPageHeader(_args); 148 | } 149 | else { 150 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read DataPageHeader from input"); 151 | } 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /dev/demo.ts: -------------------------------------------------------------------------------- 1 | import { ParquetReader, ParquetSchema, ParquetWriter, ParquetWriterOptions } from '../src'; 2 | 3 | const TEST_VTIME = Date.now(); 4 | const TEST_NUM_ROWS = 10; 5 | 6 | async function example() { 7 | 8 | const opts: ParquetWriterOptions = { 9 | useDataPageV2: false, 10 | // pageSize: 12 11 | }; 12 | 13 | const schema = new ParquetSchema({ 14 | name: { type: 'UTF8' }, 15 | // parquet-mr actually doesnt support this 16 | // quantity: { type: 'INT64', encoding: 'RLE', typeLength: 6, optional: true, compression: opts.compression }, 17 | quantity: { type: 'INT64', optional: true }, 18 | price: { type: 'DOUBLE' }, 19 | date: { type: 'TIMESTAMP_MICROS' }, 20 | day: { type: 'DATE' }, 21 | finger: { type: 'FIXED_LEN_BYTE_ARRAY', typeLength: 5 }, 22 | inter: { type: 'INTERVAL' }, 23 | // // TODO: Drill compatible 24 | stock: { 25 | repeated: true, 26 | fields: { 27 | quantity: { type: 'INT64', repeated: true }, 28 | warehouse: { type: 'UTF8' }, 29 | loc: { 30 | optional: true, 31 | fields: { 32 | tags: { 33 | optional: true, 34 | fields: { 35 | val: { type: 'UTF8' }, 36 | xyz: { type: 'INT32' } 37 | } 38 | }, 39 | lon: { type: 'FLOAT' }, 40 | lat: { type: 'FLOAT' }, 41 | zags: { 42 | optional: true, 43 | fields: { 44 | zal: { type: 'UTF8' }, 45 | zyx: { type: 'INT32' } 46 | } 47 | }, 48 | } 49 | } 50 | } 51 | }, 52 | // colour: { type: 'UTF8', repeated: true, compression: opts.compression }, 53 | // meta_json: { type: 'BSON', optional: true, compression: opts.compression }, 54 | // compression: { type: 'UTF8', optional: true, compression: opts.compression } 55 | }).compress('SNAPPY'); 56 | 57 | console.log(schema); 58 | 59 | const writer = await ParquetWriter.openFile(schema, 'fruits.parquet', opts); 60 | const rows = mkTestRows(opts); 61 | for (const row of rows) { 62 | writer.appendRow(row); 63 | } 64 | await writer.close(); 65 | console.log('Write: OK'); 66 | 67 | const reader = await ParquetReader.openFile('fruits.parquet'); 68 | console.log(reader.getSchema()); 69 | const cursor = reader.getCursor(['name', ['stock', 'loc', '#']]); 70 | let record = null; 71 | while (record = await cursor.next()) { 72 | console.log(record); 73 | } 74 | reader.close(); 75 | console.log('Read: OK'); 76 | } 77 | 78 | function mkTestRows(opts?: any) { 79 | const rows: any[] = []; 80 | 81 | for (let i = 0; i < TEST_NUM_ROWS; i++) { 82 | rows.push({ 83 | name: 'apples', 84 | quantity: 10, 85 | price: 2.6, 86 | day: new Date('2017-11-26'), 87 | date: new Date(TEST_VTIME + 1000 * i), 88 | finger: 'FNORD', 89 | inter: { months: 42, days: 23, milliseconds: 777 }, 90 | stock: [ 91 | { quantity: 10, warehouse: 'A' }, 92 | { quantity: 20, warehouse: 'B', loc: { lon: 6, lat: 9, tags: { val: 'abc', xyz: 77 } } } 93 | ], 94 | colour: ['green', 'red', 'blue'], 95 | compression: opts && opts.compression 96 | }); 97 | 98 | rows.push({ 99 | name: 'oranges', 100 | quantity: 20, 101 | price: 2.7, 102 | day: new Date('2017-11-26'), 103 | date: new Date(TEST_VTIME + 2000 * i), 104 | finger: 'FNORD', 105 | inter: { months: 42, days: 23, milliseconds: 777 }, 106 | stock: { 107 | quantity: [50, 33], 108 | warehouse: 'X' 109 | }, 110 | colour: ['orange'] 111 | }); 112 | 113 | rows.push({ 114 | name: 'kiwi', 115 | price: 4.2, 116 | quantity: undefined, 117 | day: new Date('2017-11-26'), 118 | date: new Date(TEST_VTIME + 8000 * i), 119 | finger: 'FNORD', 120 | inter: { months: 42, days: 23, milliseconds: 777 }, 121 | stock: [ 122 | { quantity: 42, warehouse: 'f' }, 123 | { quantity: 20, warehouse: 'x' } 124 | ], 125 | colour: ['green', 'brown'], 126 | meta_json: { expected_ship_date: new Date(TEST_VTIME) } 127 | }); 128 | 129 | rows.push({ 130 | name: 'banana', 131 | price: 3.2, 132 | day: new Date('2017-11-26'), 133 | date: new Date(TEST_VTIME + 6000 * i), 134 | finger: 'FNORD', 135 | inter: { months: 42, days: 23, milliseconds: 777 }, 136 | colour: ['yellow'], 137 | meta_json: { shape: 'curved' } 138 | }); 139 | } 140 | 141 | return rows; 142 | } 143 | 144 | export function mkTestRowsNoRepeat(opts?: any) { 145 | const rows: any[] = []; 146 | 147 | for (let i = 0; i < TEST_NUM_ROWS; i++) { 148 | rows.push({ 149 | name: 'apples', 150 | quantity: 10, 151 | price: 2.6, 152 | day: new Date('2017-11-26'), 153 | date: new Date(TEST_VTIME + 1000 * i), 154 | finger: 'FNORD', 155 | inter: { months: 42, days: 23, milliseconds: 777 }, 156 | stock: { quantity: 10, warehouse: 'A' }, 157 | colour: ['green', 'red'], 158 | compression: opts && opts.compression 159 | }); 160 | 161 | rows.push({ 162 | name: 'oranges', 163 | quantity: 20, 164 | price: 2.7, 165 | day: new Date('2017-11-26'), 166 | date: new Date(TEST_VTIME + 2000 * i), 167 | finger: 'FNORD', 168 | inter: { months: 42, days: 23, milliseconds: 777 }, 169 | stock: { 170 | quantity: 50, 171 | warehouse: 'X' 172 | }, 173 | colour: ['orange'] 174 | }); 175 | 176 | rows.push({ 177 | name: 'kiwi', 178 | price: 4.2, 179 | quantity: undefined, 180 | day: new Date('2017-11-26'), 181 | date: new Date(TEST_VTIME + 8000 * i), 182 | finger: 'FNORD', 183 | inter: { months: 42, days: 23, milliseconds: 777 }, 184 | stock: { quantity: 20, warehouse: 'x' }, 185 | colour: ['green', 'brown'], 186 | meta_json: { expected_ship_date: new Date(TEST_VTIME) } 187 | }); 188 | 189 | rows.push({ 190 | name: 'banana', 191 | price: 3.2, 192 | day: new Date('2017-11-26'), 193 | date: new Date(TEST_VTIME + 6000 * i), 194 | finger: 'FNORD', 195 | inter: { months: 42, days: 23, milliseconds: 777 }, 196 | colour: ['yellow'], 197 | meta_json: { shape: 'curved' } 198 | }); 199 | } 200 | 201 | return rows; 202 | } 203 | 204 | example(); 205 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | /* Basic Options */ 4 | // "target": "es5", /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017','ES2018' or 'ESNEXT'. */ 5 | "target": "es2017", 6 | // "module": "commonjs", /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', or 'ESNext'. */ 7 | "module": "commonjs", 8 | // "lib": [], /* Specify library files to be included in the compilation. */ 9 | "lib": [ 10 | "es2015", 11 | "es2016", 12 | "es2017", 13 | "esnext.asynciterable", 14 | "esnext.array" 15 | ], 16 | // "allowJs": true, /* Allow javascript files to be compiled. */ 17 | // "checkJs": true, /* Report errors in .js files. */ 18 | // "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', or 'react'. */ 19 | // "declaration": true, /* Generates corresponding '.d.ts' file. */ 20 | "declaration": true, 21 | // "sourceMap": true, /* Generates corresponding '.map' file. */ 22 | "sourceMap": true, 23 | // "outFile": "./", /* Concatenate and emit output to single file. */ 24 | // "outDir": "./", /* Redirect output structure to the directory. */ 25 | "outDir": "build", 26 | // "rootDir": "./", /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */ 27 | // "removeComments": true, /* Do not emit comments to output. */ 28 | // "noEmit": true, /* Do not emit outputs. */ 29 | // "importHelpers": true, /* Import emit helpers from 'tslib'. */ 30 | // "downlevelIteration": true, /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */ 31 | // "isolatedModules": true, /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */ 32 | /* Strict Type-Checking Options */ 33 | // "strict": true, /* Enable all strict type-checking options. */ 34 | "strict": false, 35 | // "noImplicitAny": true, /* Raise error on expressions and declarations with an implied 'any' type. */ 36 | "noImplicitAny": true, 37 | // "strictNullChecks": true, /* Enable strict null checks. */ 38 | // "strictFunctionTypes": true, /* Enable strict checking of function types. */ 39 | // "strictPropertyInitialization": true, /* Enable strict checking of property initialization in classes. */ 40 | // "noImplicitThis": true, /* Raise error on 'this' expressions with an implied 'any' type. */ 41 | "noImplicitThis": true, 42 | // "alwaysStrict": true, /* Parse in strict mode and emit "use strict" for each source file. */ 43 | "alwaysStrict": true, 44 | /* Additional Checks */ 45 | // "noUnusedLocals": true, /* Report errors on unused locals. */ 46 | "noUnusedLocals": true, 47 | // "noUnusedParameters": true, /* Report errors on unused parameters. */ 48 | // "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ 49 | "noImplicitReturns": true, 50 | // "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ 51 | "noFallthroughCasesInSwitch": true, 52 | /* Module Resolution Options */ 53 | // "moduleResolution": "node", /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */ 54 | "moduleResolution": "node", 55 | // "baseUrl": "./", /* Base directory to resolve non-absolute module names. */ 56 | // "paths": {}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */ 57 | // "rootDirs": [], /* List of root folders whose combined content represents the structure of the project at runtime. */ 58 | // "typeRoots": [], /* List of folders to include type definitions from. */ 59 | // "types": [], /* Type declaration files to be included in compilation. */ 60 | // "allowSyntheticDefaultImports": true, /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */ 61 | // "allowSyntheticDefaultImports": true, 62 | // "esModuleInterop": true, /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */ 63 | // "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */ 64 | /* Source Map Options */ 65 | // "sourceRoot": "./", /* Specify the location where debugger should locate TypeScript files instead of source locations. */ 66 | // "mapRoot": "./", /* Specify the location where debugger should locate map files instead of generated locations. */ 67 | // "inlineSourceMap": true, /* Emit a single file with source maps instead of having a separate file. */ 68 | // "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */ 69 | /* Experimental Options */ 70 | // "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */ 71 | "experimentalDecorators": true, 72 | // "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */ 73 | "emitDecoratorMetadata": true, 74 | /* Mics */ 75 | "forceConsistentCasingInFileNames": true, 76 | "stripInternal": true, 77 | "pretty": true, 78 | "resolveJsonModule": true, 79 | // Avoid to include dom lib 80 | "skipLibCheck": true 81 | }, 82 | "include": [ 83 | "src/**/*", 84 | "dev/**/*", 85 | "test/**/*" 86 | ], 87 | "exclude": [ 88 | "node_modules", 89 | "build", 90 | "lib" 91 | ] 92 | } -------------------------------------------------------------------------------- /src/thrift/RowGroup.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import Int64 = require("node-int64"); 8 | import * as thrift from "thrift"; 9 | import * as ColumnChunk from "./ColumnChunk"; 10 | import * as SortingColumn from "./SortingColumn"; 11 | export interface IRowGroupArgs { 12 | columns: Array; 13 | total_byte_size: number | Int64; 14 | num_rows: number | Int64; 15 | sorting_columns?: Array; 16 | } 17 | export class RowGroup { 18 | public columns: Array; 19 | public total_byte_size: Int64; 20 | public num_rows: Int64; 21 | public sorting_columns?: Array; 22 | constructor(args: IRowGroupArgs) { 23 | if (args != null && args.columns != null) { 24 | this.columns = args.columns; 25 | } 26 | else { 27 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[columns] is unset!"); 28 | } 29 | if (args != null && args.total_byte_size != null) { 30 | if (typeof args.total_byte_size === "number") { 31 | this.total_byte_size = new Int64(args.total_byte_size); 32 | } 33 | else { 34 | this.total_byte_size = args.total_byte_size; 35 | } 36 | } 37 | else { 38 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[total_byte_size] is unset!"); 39 | } 40 | if (args != null && args.num_rows != null) { 41 | if (typeof args.num_rows === "number") { 42 | this.num_rows = new Int64(args.num_rows); 43 | } 44 | else { 45 | this.num_rows = args.num_rows; 46 | } 47 | } 48 | else { 49 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[num_rows] is unset!"); 50 | } 51 | if (args != null && args.sorting_columns != null) { 52 | this.sorting_columns = args.sorting_columns; 53 | } 54 | } 55 | public write(output: thrift.TProtocol): void { 56 | output.writeStructBegin("RowGroup"); 57 | if (this.columns != null) { 58 | output.writeFieldBegin("columns", thrift.Thrift.Type.LIST, 1); 59 | output.writeListBegin(thrift.Thrift.Type.STRUCT, this.columns.length); 60 | this.columns.forEach((value_1: ColumnChunk.ColumnChunk): void => { 61 | value_1.write(output); 62 | }); 63 | output.writeListEnd(); 64 | output.writeFieldEnd(); 65 | } 66 | if (this.total_byte_size != null) { 67 | output.writeFieldBegin("total_byte_size", thrift.Thrift.Type.I64, 2); 68 | output.writeI64(this.total_byte_size); 69 | output.writeFieldEnd(); 70 | } 71 | if (this.num_rows != null) { 72 | output.writeFieldBegin("num_rows", thrift.Thrift.Type.I64, 3); 73 | output.writeI64(this.num_rows); 74 | output.writeFieldEnd(); 75 | } 76 | if (this.sorting_columns != null) { 77 | output.writeFieldBegin("sorting_columns", thrift.Thrift.Type.LIST, 4); 78 | output.writeListBegin(thrift.Thrift.Type.STRUCT, this.sorting_columns.length); 79 | this.sorting_columns.forEach((value_2: SortingColumn.SortingColumn): void => { 80 | value_2.write(output); 81 | }); 82 | output.writeListEnd(); 83 | output.writeFieldEnd(); 84 | } 85 | output.writeFieldStop(); 86 | output.writeStructEnd(); 87 | return; 88 | } 89 | public static read(input: thrift.TProtocol): RowGroup { 90 | input.readStructBegin(); 91 | let _args: any = {}; 92 | while (true) { 93 | const ret: thrift.TField = input.readFieldBegin(); 94 | const fieldType: thrift.Thrift.Type = ret.ftype; 95 | const fieldId: number = ret.fid; 96 | if (fieldType === thrift.Thrift.Type.STOP) { 97 | break; 98 | } 99 | switch (fieldId) { 100 | case 1: 101 | if (fieldType === thrift.Thrift.Type.LIST) { 102 | const value_3: Array = new Array(); 103 | const metadata_1: thrift.TList = input.readListBegin(); 104 | const size_1: number = metadata_1.size; 105 | for (let i_1: number = 0; i_1 < size_1; i_1++) { 106 | const value_4: ColumnChunk.ColumnChunk = ColumnChunk.ColumnChunk.read(input); 107 | value_3.push(value_4); 108 | } 109 | input.readListEnd(); 110 | _args.columns = value_3; 111 | } 112 | else { 113 | input.skip(fieldType); 114 | } 115 | break; 116 | case 2: 117 | if (fieldType === thrift.Thrift.Type.I64) { 118 | const value_5: Int64 = input.readI64(); 119 | _args.total_byte_size = value_5; 120 | } 121 | else { 122 | input.skip(fieldType); 123 | } 124 | break; 125 | case 3: 126 | if (fieldType === thrift.Thrift.Type.I64) { 127 | const value_6: Int64 = input.readI64(); 128 | _args.num_rows = value_6; 129 | } 130 | else { 131 | input.skip(fieldType); 132 | } 133 | break; 134 | case 4: 135 | if (fieldType === thrift.Thrift.Type.LIST) { 136 | const value_7: Array = new Array(); 137 | const metadata_2: thrift.TList = input.readListBegin(); 138 | const size_2: number = metadata_2.size; 139 | for (let i_2: number = 0; i_2 < size_2; i_2++) { 140 | const value_8: SortingColumn.SortingColumn = SortingColumn.SortingColumn.read(input); 141 | value_7.push(value_8); 142 | } 143 | input.readListEnd(); 144 | _args.sorting_columns = value_7; 145 | } 146 | else { 147 | input.skip(fieldType); 148 | } 149 | break; 150 | default: { 151 | input.skip(fieldType); 152 | } 153 | } 154 | input.readFieldEnd(); 155 | } 156 | input.readStructEnd(); 157 | if (_args.columns !== undefined && _args.total_byte_size !== undefined && _args.num_rows !== undefined) { 158 | return new RowGroup(_args); 159 | } 160 | else { 161 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read RowGroup from input"); 162 | } 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /src/shred.ts: -------------------------------------------------------------------------------- 1 | import { ParquetBuffer, ParquetData, ParquetField, ParquetRecord } from './declare'; 2 | import { ParquetSchema } from './schema'; 3 | import * as Types from './types'; 4 | 5 | export function shredBuffer(schema: ParquetSchema): ParquetBuffer { 6 | const columnData: Record = {}; 7 | for (const field of schema.fieldList) { 8 | columnData[field.key] = { 9 | dlevels: [], 10 | rlevels: [], 11 | values: [], 12 | count: 0 13 | }; 14 | } 15 | return { rowCount: 0, columnData }; 16 | } 17 | 18 | /** 19 | * 'Shred' a record into a list of 20 | * tuples per column using the Google Dremel Algorithm.. 21 | * 22 | * The buffer argument must point to an object into which the shredded record 23 | * will be returned. You may re-use the buffer for repeated calls to this function 24 | * to append to an existing buffer, as long as the schema is unchanged. 25 | * 26 | * The format in which the shredded records will be stored in the buffer is as 27 | * follows: 28 | * 29 | * buffer = { 30 | * columnData: [ 31 | * 'my_col': { 32 | * dlevels: [d1, d2, .. dN], 33 | * rlevels: [r1, r2, .. rN], 34 | * values: [v1, v2, .. vN], 35 | * }, ... 36 | * ], 37 | * rowCount: X, 38 | * } 39 | */ 40 | export function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetBuffer): void { 41 | /* shred the record, this may raise an exception */ 42 | const data = shredBuffer(schema).columnData; 43 | 44 | shredRecordFields(schema.fields, record, data, 0, 0); 45 | 46 | /* if no error during shredding, add the shredded record to the buffer */ 47 | if (!('columnData' in buffer) || !('rowCount' in buffer)) { 48 | buffer.rowCount = 1; 49 | buffer.columnData = data; 50 | return; 51 | } 52 | buffer.rowCount += 1; 53 | for (const field of schema.fieldList) { 54 | Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels); 55 | Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels); 56 | Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values); 57 | buffer.columnData[field.key].count += data[field.key].count; 58 | } 59 | } 60 | 61 | function shredRecordFields( 62 | fields: Record, 63 | record: any, 64 | data: Record, 65 | rLevel: number, 66 | dLevel: number 67 | ) { 68 | for (const name in fields) { 69 | const field = fields[name]; 70 | 71 | // fetch values 72 | let values = []; 73 | if (record && (field.name in record) && record[field.name] !== undefined && record[field.name] !== null) { 74 | if (record[field.name].constructor === Array) { 75 | values = record[field.name]; 76 | } else { 77 | values.push(record[field.name]); 78 | } 79 | } 80 | // check values 81 | if (values.length === 0 && !!record && field.repetitionType === 'REQUIRED') { 82 | throw new Error(`missing required field: ${field.name}`); 83 | } 84 | if (values.length > 1 && field.repetitionType !== 'REPEATED') { 85 | throw new Error(`too many values for field: ${field.name}`); 86 | } 87 | 88 | // push null 89 | if (values.length === 0) { 90 | if (field.isNested) { 91 | shredRecordFields( 92 | field.fields, 93 | null, 94 | data, 95 | rLevel, 96 | dLevel); 97 | } else { 98 | data[field.key].count += 1; 99 | data[field.key].rlevels.push(rLevel); 100 | data[field.key].dlevels.push(dLevel); 101 | } 102 | continue; 103 | } 104 | 105 | // push values 106 | for (let i = 0; i < values.length; i++) { 107 | const rlvl = i === 0 ? rLevel : field.rLevelMax; 108 | if (field.isNested) { 109 | shredRecordFields( 110 | field.fields, 111 | values[i], 112 | data, 113 | rlvl, 114 | field.dLevelMax); 115 | } else { 116 | data[field.key].count += 1; 117 | data[field.key].rlevels.push(rlvl); 118 | data[field.key].dlevels.push(field.dLevelMax); 119 | data[field.key].values.push(Types.toPrimitive( 120 | field.originalType || field.primitiveType, 121 | values[i] 122 | )); 123 | } 124 | } 125 | } 126 | } 127 | 128 | /** 129 | * 'Materialize' a list of 130 | * tuples back to nested records (objects/arrays) using the Google Dremel 131 | * Algorithm.. 132 | * 133 | * The buffer argument must point to an object with the following structure (i.e. 134 | * the same structure that is returned by shredRecords): 135 | * 136 | * buffer = { 137 | * columnData: [ 138 | * 'my_col': { 139 | * dlevels: [d1, d2, .. dN], 140 | * rlevels: [r1, r2, .. rN], 141 | * values: [v1, v2, .. vN], 142 | * }, ... 143 | * ], 144 | * rowCount: X, 145 | * } 146 | */ 147 | export function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[] { 148 | const records: ParquetRecord[] = []; 149 | for (let i = 0; i < buffer.rowCount; i++) records.push({}); 150 | for (const key in buffer.columnData) { 151 | materializeColumn(schema, buffer, key, records); 152 | } 153 | return records; 154 | } 155 | 156 | function materializeColumn(schema: ParquetSchema, buffer: ParquetBuffer, key: string, records: ParquetRecord[]) { 157 | const data = buffer.columnData[key]; 158 | if (!data.count) return; 159 | 160 | const field = schema.findField(key); 161 | const branch = schema.findFieldBranch(key); 162 | 163 | // tslint:disable-next-line:prefer-array-literal 164 | const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0); 165 | let vIndex = 0; 166 | for (let i = 0; i < data.count; i++) { 167 | const dLevel = data.dlevels[i]; 168 | const rLevel = data.rlevels[i]; 169 | rLevels[rLevel]++; 170 | rLevels.fill(0, rLevel + 1); 171 | 172 | let rIndex = 0; 173 | let record = records[rLevels[rIndex++] - 1]; 174 | 175 | // Internal nodes 176 | for (const step of branch) { 177 | if (step === field) break; 178 | if (dLevel < step.dLevelMax) break; 179 | if (step.repetitionType === 'REPEATED') { 180 | if (!(step.name in record)) record[step.name] = []; 181 | const ix = rLevels[rIndex++]; 182 | while (record[step.name].length <= ix) record[step.name].push({}); 183 | record = record[step.name][ix]; 184 | } else { 185 | record[step.name] = record[step.name] || {}; 186 | record = record[step.name]; 187 | } 188 | } 189 | 190 | // Leaf node 191 | if (dLevel === field.dLevelMax) { 192 | const value = Types.fromPrimitive( 193 | field.originalType || field.primitiveType, 194 | data.values[vIndex] 195 | ); 196 | vIndex++; 197 | if (field.repetitionType === 'REPEATED') { 198 | if (!(field.name in record)) record[field.name] = []; 199 | const ix = rLevels[rIndex]; 200 | while (record[field.name].length <= ix) record[field.name].push(null); 201 | record[field.name][ix] = value; 202 | } else { 203 | record[field.name] = value; 204 | } 205 | } 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /src/thrift/ColumnChunk.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import Int64 = require("node-int64"); 8 | import * as thrift from "thrift"; 9 | import * as ColumnMetaData from "./ColumnMetaData"; 10 | export interface IColumnChunkArgs { 11 | file_path?: string; 12 | file_offset: number | Int64; 13 | meta_data?: ColumnMetaData.ColumnMetaData; 14 | offset_index_offset?: number | Int64; 15 | offset_index_length?: number; 16 | column_index_offset?: number | Int64; 17 | column_index_length?: number; 18 | } 19 | export class ColumnChunk { 20 | public file_path?: string; 21 | public file_offset: Int64; 22 | public meta_data?: ColumnMetaData.ColumnMetaData; 23 | public offset_index_offset?: Int64; 24 | public offset_index_length?: number; 25 | public column_index_offset?: Int64; 26 | public column_index_length?: number; 27 | constructor(args: IColumnChunkArgs) { 28 | if (args != null && args.file_path != null) { 29 | this.file_path = args.file_path; 30 | } 31 | if (args != null && args.file_offset != null) { 32 | if (typeof args.file_offset === "number") { 33 | this.file_offset = new Int64(args.file_offset); 34 | } 35 | else { 36 | this.file_offset = args.file_offset; 37 | } 38 | } 39 | else { 40 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[file_offset] is unset!"); 41 | } 42 | if (args != null && args.meta_data != null) { 43 | this.meta_data = args.meta_data; 44 | } 45 | if (args != null && args.offset_index_offset != null) { 46 | if (typeof args.offset_index_offset === "number") { 47 | this.offset_index_offset = new Int64(args.offset_index_offset); 48 | } 49 | else { 50 | this.offset_index_offset = args.offset_index_offset; 51 | } 52 | } 53 | if (args != null && args.offset_index_length != null) { 54 | this.offset_index_length = args.offset_index_length; 55 | } 56 | if (args != null && args.column_index_offset != null) { 57 | if (typeof args.column_index_offset === "number") { 58 | this.column_index_offset = new Int64(args.column_index_offset); 59 | } 60 | else { 61 | this.column_index_offset = args.column_index_offset; 62 | } 63 | } 64 | if (args != null && args.column_index_length != null) { 65 | this.column_index_length = args.column_index_length; 66 | } 67 | } 68 | public write(output: thrift.TProtocol): void { 69 | output.writeStructBegin("ColumnChunk"); 70 | if (this.file_path != null) { 71 | output.writeFieldBegin("file_path", thrift.Thrift.Type.STRING, 1); 72 | output.writeString(this.file_path); 73 | output.writeFieldEnd(); 74 | } 75 | if (this.file_offset != null) { 76 | output.writeFieldBegin("file_offset", thrift.Thrift.Type.I64, 2); 77 | output.writeI64(this.file_offset); 78 | output.writeFieldEnd(); 79 | } 80 | if (this.meta_data != null) { 81 | output.writeFieldBegin("meta_data", thrift.Thrift.Type.STRUCT, 3); 82 | this.meta_data.write(output); 83 | output.writeFieldEnd(); 84 | } 85 | if (this.offset_index_offset != null) { 86 | output.writeFieldBegin("offset_index_offset", thrift.Thrift.Type.I64, 4); 87 | output.writeI64(this.offset_index_offset); 88 | output.writeFieldEnd(); 89 | } 90 | if (this.offset_index_length != null) { 91 | output.writeFieldBegin("offset_index_length", thrift.Thrift.Type.I32, 5); 92 | output.writeI32(this.offset_index_length); 93 | output.writeFieldEnd(); 94 | } 95 | if (this.column_index_offset != null) { 96 | output.writeFieldBegin("column_index_offset", thrift.Thrift.Type.I64, 6); 97 | output.writeI64(this.column_index_offset); 98 | output.writeFieldEnd(); 99 | } 100 | if (this.column_index_length != null) { 101 | output.writeFieldBegin("column_index_length", thrift.Thrift.Type.I32, 7); 102 | output.writeI32(this.column_index_length); 103 | output.writeFieldEnd(); 104 | } 105 | output.writeFieldStop(); 106 | output.writeStructEnd(); 107 | return; 108 | } 109 | public static read(input: thrift.TProtocol): ColumnChunk { 110 | input.readStructBegin(); 111 | let _args: any = {}; 112 | while (true) { 113 | const ret: thrift.TField = input.readFieldBegin(); 114 | const fieldType: thrift.Thrift.Type = ret.ftype; 115 | const fieldId: number = ret.fid; 116 | if (fieldType === thrift.Thrift.Type.STOP) { 117 | break; 118 | } 119 | switch (fieldId) { 120 | case 1: 121 | if (fieldType === thrift.Thrift.Type.STRING) { 122 | const value_1: string = input.readString(); 123 | _args.file_path = value_1; 124 | } 125 | else { 126 | input.skip(fieldType); 127 | } 128 | break; 129 | case 2: 130 | if (fieldType === thrift.Thrift.Type.I64) { 131 | const value_2: Int64 = input.readI64(); 132 | _args.file_offset = value_2; 133 | } 134 | else { 135 | input.skip(fieldType); 136 | } 137 | break; 138 | case 3: 139 | if (fieldType === thrift.Thrift.Type.STRUCT) { 140 | const value_3: ColumnMetaData.ColumnMetaData = ColumnMetaData.ColumnMetaData.read(input); 141 | _args.meta_data = value_3; 142 | } 143 | else { 144 | input.skip(fieldType); 145 | } 146 | break; 147 | case 4: 148 | if (fieldType === thrift.Thrift.Type.I64) { 149 | const value_4: Int64 = input.readI64(); 150 | _args.offset_index_offset = value_4; 151 | } 152 | else { 153 | input.skip(fieldType); 154 | } 155 | break; 156 | case 5: 157 | if (fieldType === thrift.Thrift.Type.I32) { 158 | const value_5: number = input.readI32(); 159 | _args.offset_index_length = value_5; 160 | } 161 | else { 162 | input.skip(fieldType); 163 | } 164 | break; 165 | case 6: 166 | if (fieldType === thrift.Thrift.Type.I64) { 167 | const value_6: Int64 = input.readI64(); 168 | _args.column_index_offset = value_6; 169 | } 170 | else { 171 | input.skip(fieldType); 172 | } 173 | break; 174 | case 7: 175 | if (fieldType === thrift.Thrift.Type.I32) { 176 | const value_7: number = input.readI32(); 177 | _args.column_index_length = value_7; 178 | } 179 | else { 180 | input.skip(fieldType); 181 | } 182 | break; 183 | default: { 184 | input.skip(fieldType); 185 | } 186 | } 187 | input.readFieldEnd(); 188 | } 189 | input.readStructEnd(); 190 | if (_args.file_offset !== undefined) { 191 | return new ColumnChunk(_args); 192 | } 193 | else { 194 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read ColumnChunk from input"); 195 | } 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /src/codec/plain.ts: -------------------------------------------------------------------------------- 1 | import { PrimitiveType } from '../declare'; 2 | import { CursorBuffer, ParquetCodecOptions } from './declare'; 3 | import INT53 = require('int53'); 4 | 5 | export function encodeValues(type: PrimitiveType, values: any[], opts?: ParquetCodecOptions): Buffer { 6 | switch (type) { 7 | case 'BOOLEAN': 8 | return encodeValues_BOOLEAN(values); 9 | case 'INT32': 10 | return encodeValues_INT32(values); 11 | case 'INT64': 12 | return encodeValues_INT64(values); 13 | case 'INT96': 14 | return encodeValues_INT96(values); 15 | case 'FLOAT': 16 | return encodeValues_FLOAT(values); 17 | case 'DOUBLE': 18 | return encodeValues_DOUBLE(values); 19 | case 'BYTE_ARRAY': 20 | return encodeValues_BYTE_ARRAY(values); 21 | case 'FIXED_LEN_BYTE_ARRAY': 22 | return encodeValues_FIXED_LEN_BYTE_ARRAY(values, opts); 23 | default: 24 | throw new Error(`unsupported type: ${type}`); 25 | } 26 | } 27 | 28 | export function decodeValues(type: PrimitiveType, cursor: CursorBuffer, count: number, opts: ParquetCodecOptions): any[] { 29 | switch (type) { 30 | case 'BOOLEAN': 31 | return decodeValues_BOOLEAN(cursor, count); 32 | case 'INT32': 33 | return decodeValues_INT32(cursor, count); 34 | case 'INT64': 35 | return decodeValues_INT64(cursor, count); 36 | case 'INT96': 37 | return decodeValues_INT96(cursor, count); 38 | case 'FLOAT': 39 | return decodeValues_FLOAT(cursor, count); 40 | case 'DOUBLE': 41 | return decodeValues_DOUBLE(cursor, count); 42 | case 'BYTE_ARRAY': 43 | return decodeValues_BYTE_ARRAY(cursor, count); 44 | case 'FIXED_LEN_BYTE_ARRAY': 45 | return decodeValues_FIXED_LEN_BYTE_ARRAY(cursor, count, opts); 46 | default: 47 | throw new Error(`unsupported type: ${type}`); 48 | } 49 | } 50 | 51 | function encodeValues_BOOLEAN(values: boolean[]): Buffer { 52 | const buf = Buffer.alloc(Math.ceil(values.length / 8)); 53 | buf.fill(0); 54 | for (let i = 0; i < values.length; i++) { 55 | if (values[i]) { 56 | buf[Math.floor(i / 8)] |= (1 << (i % 8)); 57 | } 58 | } 59 | return buf; 60 | } 61 | 62 | function decodeValues_BOOLEAN(cursor: CursorBuffer, count: number): boolean[] { 63 | const values: boolean[] = []; 64 | for (let i = 0; i < count; i++) { 65 | const b = cursor.buffer[cursor.offset + Math.floor(i / 8)]; 66 | values.push((b & (1 << (i % 8))) > 0); 67 | } 68 | cursor.offset += Math.ceil(count / 8); 69 | return values; 70 | } 71 | 72 | function encodeValues_INT32(values: number[]): Buffer { 73 | const buf = Buffer.alloc(4 * values.length); 74 | for (let i = 0; i < values.length; i++) { 75 | buf.writeInt32LE(values[i], i * 4); 76 | } 77 | return buf; 78 | } 79 | 80 | function decodeValues_INT32(cursor: CursorBuffer, count: number): number[] { 81 | const values: number[] = []; 82 | for (let i = 0; i < count; i++) { 83 | values.push(cursor.buffer.readInt32LE(cursor.offset)); 84 | cursor.offset += 4; 85 | } 86 | return values; 87 | } 88 | 89 | function encodeValues_INT64(values: number[]): Buffer { 90 | const buf = Buffer.alloc(8 * values.length); 91 | for (let i = 0; i < values.length; i++) { 92 | INT53.writeInt64LE(values[i], buf, i * 8); 93 | } 94 | return buf; 95 | } 96 | 97 | function decodeValues_INT64(cursor: CursorBuffer, count: number): number[] { 98 | const values: number[] = []; 99 | for (let i = 0; i < count; i++) { 100 | values.push(INT53.readInt64LE(cursor.buffer, cursor.offset)); 101 | cursor.offset += 8; 102 | } 103 | return values; 104 | } 105 | 106 | function encodeValues_INT96(values: number[]): Buffer { 107 | const buf = Buffer.alloc(12 * values.length); 108 | for (let i = 0; i < values.length; i++) { 109 | if (values[i] >= 0) { 110 | INT53.writeInt64LE(values[i], buf, i * 12); 111 | buf.writeUInt32LE(0, i * 12 + 8); // truncate to 64 actual precision 112 | } else { 113 | INT53.writeInt64LE((~-values[i]) + 1, buf, i * 12); 114 | buf.writeUInt32LE(0xffffffff, i * 12 + 8); // truncate to 64 actual precision 115 | } 116 | } 117 | return buf; 118 | } 119 | 120 | function decodeValues_INT96(cursor: CursorBuffer, count: number): number[] { 121 | const values: number[] = []; 122 | for (let i = 0; i < count; i++) { 123 | const low = INT53.readInt64LE(cursor.buffer, cursor.offset); 124 | const high = cursor.buffer.readUInt32LE(cursor.offset + 8); 125 | if (high === 0xffffffff) { 126 | values.push((~-low) + 1); // truncate to 64 actual precision 127 | } else { 128 | values.push(low); // truncate to 64 actual precision 129 | } 130 | cursor.offset += 12; 131 | } 132 | return values; 133 | } 134 | 135 | function encodeValues_FLOAT(values: number[]): Buffer { 136 | const buf = Buffer.alloc(4 * values.length); 137 | for (let i = 0; i < values.length; i++) { 138 | buf.writeFloatLE(values[i], i * 4); 139 | } 140 | return buf; 141 | } 142 | 143 | function decodeValues_FLOAT(cursor: CursorBuffer, count: number): number[] { 144 | const values: number[] = []; 145 | for (let i = 0; i < count; i++) { 146 | values.push(cursor.buffer.readFloatLE(cursor.offset)); 147 | cursor.offset += 4; 148 | } 149 | return values; 150 | } 151 | 152 | function encodeValues_DOUBLE(values: number[]): Buffer { 153 | const buf = Buffer.alloc(8 * values.length); 154 | for (let i = 0; i < values.length; i++) { 155 | buf.writeDoubleLE(values[i], i * 8); 156 | } 157 | return buf; 158 | } 159 | 160 | function decodeValues_DOUBLE(cursor: CursorBuffer, count: number): number[] { 161 | const values: number[] = []; 162 | for (let i = 0; i < count; i++) { 163 | values.push(cursor.buffer.readDoubleLE(cursor.offset)); 164 | cursor.offset += 8; 165 | } 166 | return values; 167 | } 168 | 169 | function encodeValues_BYTE_ARRAY(values: Buffer[]): Buffer { 170 | // tslint:disable-next-line:variable-name 171 | let buf_len = 0; 172 | for (let i = 0; i < values.length; i++) { 173 | values[i] = Buffer.from(values[i]); 174 | buf_len += 4 + values[i].length; 175 | } 176 | const buf = Buffer.alloc(buf_len); 177 | // tslint:disable-next-line:variable-name 178 | let buf_pos = 0; 179 | for (let i = 0; i < values.length; i++) { 180 | buf.writeUInt32LE(values[i].length, buf_pos); 181 | values[i].copy(buf, buf_pos + 4); 182 | buf_pos += 4 + values[i].length; 183 | 184 | } 185 | return buf; 186 | } 187 | 188 | function decodeValues_BYTE_ARRAY(cursor: CursorBuffer, count: number): Buffer[] { 189 | const values: Buffer[] = []; 190 | for (let i = 0; i < count; i++) { 191 | const len = cursor.buffer.readUInt32LE(cursor.offset); 192 | cursor.offset += 4; 193 | values.push(cursor.buffer.slice(cursor.offset, cursor.offset + len)); 194 | cursor.offset += len; 195 | } 196 | return values; 197 | } 198 | 199 | function encodeValues_FIXED_LEN_BYTE_ARRAY(values: Buffer[], opts: ParquetCodecOptions): Buffer { 200 | if (!opts.typeLength) { 201 | throw new Error('missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)'); 202 | } 203 | for (let i = 0; i < values.length; i++) { 204 | values[i] = Buffer.from(values[i]); 205 | if (values[i].length !== opts.typeLength) { 206 | throw new Error(`invalid value for FIXED_LEN_BYTE_ARRAY: ${values[i]}`); 207 | } 208 | } 209 | return Buffer.concat(values); 210 | } 211 | 212 | function decodeValues_FIXED_LEN_BYTE_ARRAY(cursor: CursorBuffer, count: number, opts: ParquetCodecOptions): Buffer[] { 213 | const values: Buffer[] = []; 214 | if (!opts.typeLength) { 215 | throw new Error('missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)'); 216 | } 217 | for (let i = 0; i < count; i++) { 218 | values.push(cursor.buffer.slice(cursor.offset, cursor.offset + opts.typeLength)); 219 | cursor.offset += opts.typeLength; 220 | } 221 | return values; 222 | } 223 | -------------------------------------------------------------------------------- /src/thrift/ColumnIndex.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import Int64 = require("node-int64"); 8 | import * as thrift from "thrift"; 9 | import * as BoundaryOrder from "./BoundaryOrder"; 10 | export interface IColumnIndexArgs { 11 | null_pages: Array; 12 | min_values: Array; 13 | max_values: Array; 14 | boundary_order: BoundaryOrder.BoundaryOrder; 15 | null_counts?: Array; 16 | } 17 | export class ColumnIndex { 18 | public null_pages: Array; 19 | public min_values: Array; 20 | public max_values: Array; 21 | public boundary_order: BoundaryOrder.BoundaryOrder; 22 | public null_counts?: Array; 23 | constructor(args: IColumnIndexArgs) { 24 | if (args != null && args.null_pages != null) { 25 | this.null_pages = args.null_pages; 26 | } 27 | else { 28 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[null_pages] is unset!"); 29 | } 30 | if (args != null && args.min_values != null) { 31 | this.min_values = args.min_values; 32 | } 33 | else { 34 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[min_values] is unset!"); 35 | } 36 | if (args != null && args.max_values != null) { 37 | this.max_values = args.max_values; 38 | } 39 | else { 40 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[max_values] is unset!"); 41 | } 42 | if (args != null && args.boundary_order != null) { 43 | this.boundary_order = args.boundary_order; 44 | } 45 | else { 46 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[boundary_order] is unset!"); 47 | } 48 | if (args != null && args.null_counts != null) { 49 | this.null_counts = args.null_counts.map(c => new Int64(+c)); 50 | } 51 | } 52 | public write(output: thrift.TProtocol): void { 53 | output.writeStructBegin("ColumnIndex"); 54 | if (this.null_pages != null) { 55 | output.writeFieldBegin("null_pages", thrift.Thrift.Type.LIST, 1); 56 | output.writeListBegin(thrift.Thrift.Type.BOOL, this.null_pages.length); 57 | this.null_pages.forEach((value_1: boolean): void => { 58 | output.writeBool(value_1); 59 | }); 60 | output.writeListEnd(); 61 | output.writeFieldEnd(); 62 | } 63 | if (this.min_values != null) { 64 | output.writeFieldBegin("min_values", thrift.Thrift.Type.LIST, 2); 65 | output.writeListBegin(thrift.Thrift.Type.STRING, this.min_values.length); 66 | this.min_values.forEach((value_2: Buffer): void => { 67 | output.writeBinary(value_2); 68 | }); 69 | output.writeListEnd(); 70 | output.writeFieldEnd(); 71 | } 72 | if (this.max_values != null) { 73 | output.writeFieldBegin("max_values", thrift.Thrift.Type.LIST, 3); 74 | output.writeListBegin(thrift.Thrift.Type.STRING, this.max_values.length); 75 | this.max_values.forEach((value_3: Buffer): void => { 76 | output.writeBinary(value_3); 77 | }); 78 | output.writeListEnd(); 79 | output.writeFieldEnd(); 80 | } 81 | if (this.boundary_order != null) { 82 | output.writeFieldBegin("boundary_order", thrift.Thrift.Type.I32, 4); 83 | output.writeI32(this.boundary_order); 84 | output.writeFieldEnd(); 85 | } 86 | if (this.null_counts != null) { 87 | output.writeFieldBegin("null_counts", thrift.Thrift.Type.LIST, 5); 88 | output.writeListBegin(thrift.Thrift.Type.I64, this.null_counts.length); 89 | this.null_counts.forEach((value_4: Int64): void => { 90 | output.writeI64(value_4); 91 | }); 92 | output.writeListEnd(); 93 | output.writeFieldEnd(); 94 | } 95 | output.writeFieldStop(); 96 | output.writeStructEnd(); 97 | return; 98 | } 99 | public static read(input: thrift.TProtocol): ColumnIndex { 100 | input.readStructBegin(); 101 | let _args: any = {}; 102 | while (true) { 103 | const ret: thrift.TField = input.readFieldBegin(); 104 | const fieldType: thrift.Thrift.Type = ret.ftype; 105 | const fieldId: number = ret.fid; 106 | if (fieldType === thrift.Thrift.Type.STOP) { 107 | break; 108 | } 109 | switch (fieldId) { 110 | case 1: 111 | if (fieldType === thrift.Thrift.Type.LIST) { 112 | const value_5: Array = new Array(); 113 | const metadata_1: thrift.TList = input.readListBegin(); 114 | const size_1: number = metadata_1.size; 115 | for (let i_1: number = 0; i_1 < size_1; i_1++) { 116 | const value_6: boolean = input.readBool(); 117 | value_5.push(value_6); 118 | } 119 | input.readListEnd(); 120 | _args.null_pages = value_5; 121 | } 122 | else { 123 | input.skip(fieldType); 124 | } 125 | break; 126 | case 2: 127 | if (fieldType === thrift.Thrift.Type.LIST) { 128 | const value_7: Array = new Array(); 129 | const metadata_2: thrift.TList = input.readListBegin(); 130 | const size_2: number = metadata_2.size; 131 | for (let i_2: number = 0; i_2 < size_2; i_2++) { 132 | const value_8: Buffer = input.readBinary(); 133 | value_7.push(value_8); 134 | } 135 | input.readListEnd(); 136 | _args.min_values = value_7; 137 | } 138 | else { 139 | input.skip(fieldType); 140 | } 141 | break; 142 | case 3: 143 | if (fieldType === thrift.Thrift.Type.LIST) { 144 | const value_9: Array = new Array(); 145 | const metadata_3: thrift.TList = input.readListBegin(); 146 | const size_3: number = metadata_3.size; 147 | for (let i_3: number = 0; i_3 < size_3; i_3++) { 148 | const value_10: Buffer = input.readBinary(); 149 | value_9.push(value_10); 150 | } 151 | input.readListEnd(); 152 | _args.max_values = value_9; 153 | } 154 | else { 155 | input.skip(fieldType); 156 | } 157 | break; 158 | case 4: 159 | if (fieldType === thrift.Thrift.Type.I32) { 160 | const value_11: BoundaryOrder.BoundaryOrder = input.readI32(); 161 | _args.boundary_order = value_11; 162 | } 163 | else { 164 | input.skip(fieldType); 165 | } 166 | break; 167 | case 5: 168 | if (fieldType === thrift.Thrift.Type.LIST) { 169 | const value_12: Array = new Array(); 170 | const metadata_4: thrift.TList = input.readListBegin(); 171 | const size_4: number = metadata_4.size; 172 | for (let i_4: number = 0; i_4 < size_4; i_4++) { 173 | const value_13: Int64 = input.readI64(); 174 | value_12.push(value_13); 175 | } 176 | input.readListEnd(); 177 | _args.null_counts = value_12; 178 | } 179 | else { 180 | input.skip(fieldType); 181 | } 182 | break; 183 | default: { 184 | input.skip(fieldType); 185 | } 186 | } 187 | input.readFieldEnd(); 188 | } 189 | input.readStructEnd(); 190 | if (_args.null_pages !== undefined && _args.min_values !== undefined && _args.max_values !== undefined && _args.boundary_order !== undefined) { 191 | return new ColumnIndex(_args); 192 | } 193 | else { 194 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read ColumnIndex from input"); 195 | } 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /dev/shred.ts: -------------------------------------------------------------------------------- 1 | import { ParquetBuffer, ParquetData, ParquetField, ParquetRecord } from '../src/declare'; 2 | import { ParquetSchema } from '../src/schema'; 3 | import * as Types from '../src/types'; 4 | 5 | /** 6 | * 'Shred' a record into a list of 7 | * tuples per column using the Google Dremel Algorithm.. 8 | * 9 | * The buffer argument must point to an object into which the shredded record 10 | * will be returned. You may re-use the buffer for repeated calls to this function 11 | * to append to an existing buffer, as long as the schema is unchanged. 12 | * 13 | * The format in which the shredded records will be stored in the buffer is as 14 | * follows: 15 | * 16 | * buffer = { 17 | * columnData: [ 18 | * 'my_col': { 19 | * dlevels: [d1, d2, .. dN], 20 | * rlevels: [r1, r2, .. rN], 21 | * values: [v1, v2, .. vN], 22 | * }, ... 23 | * ], 24 | * rowCount: X, 25 | * } 26 | * 27 | */ 28 | export function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetBuffer): void { 29 | /* shred the record, this may raise an exception */ 30 | const recordShredded: Record = {}; 31 | for (const field of schema.fieldList) { 32 | recordShredded[field.path.join()] = { 33 | dlevels: [], 34 | rlevels: [], 35 | values: [], 36 | count: 0 37 | }; 38 | } 39 | 40 | shredRecordInternal(schema.fields, record, recordShredded, 0, 0); 41 | 42 | /* if no error during shredding, add the shredded record to the buffer */ 43 | if (!('columnData' in buffer) || !('rowCount' in buffer)) { 44 | buffer.rowCount = 0; 45 | buffer.columnData = {}; 46 | 47 | for (const field of schema.fieldList) { 48 | const cd: ParquetData = { 49 | dlevels: [], 50 | rlevels: [], 51 | values: [], 52 | count: 0 53 | }; 54 | buffer.columnData[field.path.join()] = cd; 55 | } 56 | } 57 | 58 | buffer.rowCount += 1; 59 | for (const field of schema.fieldList) { 60 | Array.prototype.push.apply( 61 | buffer.columnData[field.path.join()].rlevels, 62 | recordShredded[field.path.join()].rlevels); 63 | 64 | Array.prototype.push.apply( 65 | buffer.columnData[field.path.join()].dlevels, 66 | recordShredded[field.path.join()].dlevels); 67 | 68 | Array.prototype.push.apply( 69 | buffer.columnData[field.path.join()].values, 70 | recordShredded[field.path.join()].values); 71 | 72 | buffer.columnData[field.path.join()].count += recordShredded[field.path.join()].count; 73 | } 74 | } 75 | 76 | function shredRecordInternal( 77 | fields: Record, 78 | record: any, 79 | data: Record, 80 | rlvl: number, 81 | dlvl: number 82 | ) { 83 | for (const fieldName in fields) { 84 | const field = fields[fieldName]; 85 | const fieldType = field.originalType || field.primitiveType; 86 | 87 | // fetch values 88 | let values = []; 89 | if (record && (fieldName in record) && record[fieldName] !== undefined && record[fieldName] !== null) { 90 | if (record[fieldName].constructor === Array) { 91 | values = record[fieldName]; 92 | } else { 93 | values.push(record[fieldName]); 94 | } 95 | } 96 | 97 | // check values 98 | if (values.length === 0 && !!record && field.repetitionType === 'REQUIRED') { 99 | throw new Error(`missing required field: ${field.name}`); 100 | } 101 | 102 | if (values.length > 1 && field.repetitionType !== 'REPEATED') { 103 | throw new Error(`too many values for field: ${field.name}`); 104 | } 105 | 106 | // push null 107 | if (values.length === 0) { 108 | if (field.isNested) { 109 | shredRecordInternal( 110 | field.fields, 111 | null, 112 | data, 113 | rlvl, 114 | dlvl); 115 | } else { 116 | data[field.path.join()].rlevels.push(rlvl); 117 | data[field.path.join()].dlevels.push(dlvl); 118 | data[field.path.join()].count += 1; 119 | } 120 | continue; 121 | } 122 | 123 | // push values 124 | for (let i = 0; i < values.length; ++i) { 125 | // tslint:disable-next-line:variable-name 126 | const rlvl_i = i === 0 ? rlvl : field.rLevelMax; 127 | 128 | if (field.isNested) { 129 | shredRecordInternal( 130 | field.fields, 131 | values[i], 132 | data, 133 | rlvl_i, 134 | field.dLevelMax); 135 | } else { 136 | data[field.path.join()].values.push(Types.toPrimitive(fieldType, values[i])); 137 | data[field.path.join()].rlevels.push(rlvl_i); 138 | data[field.path.join()].dlevels.push(field.dLevelMax); 139 | data[field.path.join()].count += 1; 140 | } 141 | } 142 | } 143 | } 144 | 145 | /** 146 | * 'Materialize' a list of 147 | * tuples back to nested records (objects/arrays) using the Google Dremel 148 | * Algorithm.. 149 | * 150 | * The buffer argument must point to an object with the following structure (i.e. 151 | * the same structure that is returned by shredRecords): 152 | * 153 | * buffer = { 154 | * columnData: [ 155 | * 'my_col': { 156 | * dlevels: [d1, d2, .. dN], 157 | * rlevels: [r1, r2, .. rN], 158 | * values: [v1, v2, .. vN], 159 | * }, ... 160 | * ], 161 | * rowCount: X, 162 | * } 163 | * 164 | */ 165 | export function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[] { 166 | const records: ParquetRecord[] = []; 167 | for (let i = 0; i < buffer.rowCount; ++i) { 168 | records.push({}); 169 | } 170 | 171 | for (const k in buffer.columnData) { 172 | const field = schema.findField(k); 173 | const fieldBranch = schema.findFieldBranch(k); 174 | const values = buffer.columnData[k].values[Symbol.iterator](); 175 | 176 | // tslint:disable-next-line:prefer-array-literal 177 | const rLevels = new Array(field.rLevelMax + 1); 178 | rLevels.fill(0); 179 | 180 | for (let i = 0; i < buffer.columnData[k].count; ++i) { 181 | const dLevel = buffer.columnData[k].dlevels[i]; 182 | const rLevel = buffer.columnData[k].rlevels[i]; 183 | 184 | rLevels[rLevel]++; 185 | rLevels.fill(0, rLevel + 1); 186 | 187 | let value = null; 188 | if (dLevel === field.dLevelMax) { 189 | value = Types.fromPrimitive( 190 | field.originalType || field.primitiveType, 191 | values.next().value); 192 | } 193 | 194 | materializeRecordField( 195 | records[rLevels[0] - 1], 196 | fieldBranch, 197 | rLevels.slice(1), 198 | dLevel, 199 | value); 200 | } 201 | } 202 | 203 | return records; 204 | } 205 | 206 | function materializeRecordField(record: any, branch: ParquetField[], rLevels: number[], dLevel: number, value: any): void { 207 | const node = branch[0]; 208 | 209 | if (dLevel < node.dLevelMax) { 210 | return; 211 | } 212 | 213 | if (branch.length > 1) { 214 | if (node.repetitionType === 'REPEATED') { 215 | if (!(node.name in record)) { 216 | record[node.name] = []; 217 | } 218 | 219 | while (record[node.name].length < rLevels[0] + 1) { 220 | record[node.name].push({}); 221 | } 222 | 223 | materializeRecordField( 224 | record[node.name][rLevels[0]], 225 | branch.slice(1), 226 | rLevels.slice(1), 227 | dLevel, 228 | value); 229 | } else { 230 | record[node.name] = record[node.name] || {}; 231 | 232 | materializeRecordField( 233 | record[node.name], 234 | branch.slice(1), 235 | rLevels, 236 | dLevel, 237 | value); 238 | } 239 | } else { 240 | if (node.repetitionType === 'REPEATED') { 241 | if (!(node.name in record)) { 242 | record[node.name] = []; 243 | } 244 | 245 | while (record[node.name].length < rLevels[0] + 1) { 246 | record[node.name].push(null); 247 | } 248 | 249 | record[node.name][rLevels[0]] = value; 250 | } else { 251 | record[node.name] = value; 252 | } 253 | } 254 | } 255 | -------------------------------------------------------------------------------- /src/thrift/PageHeader.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | import * as DataPageHeader from "./DataPageHeader"; 9 | import * as DataPageHeaderV2 from "./DataPageHeaderV2"; 10 | import * as DictionaryPageHeader from "./DictionaryPageHeader"; 11 | import * as IndexPageHeader from "./IndexPageHeader"; 12 | import * as PageType from "./PageType"; 13 | export interface IPageHeaderArgs { 14 | type: PageType.PageType; 15 | uncompressed_page_size: number; 16 | compressed_page_size: number; 17 | crc?: number; 18 | data_page_header?: DataPageHeader.DataPageHeader; 19 | index_page_header?: IndexPageHeader.IndexPageHeader; 20 | dictionary_page_header?: DictionaryPageHeader.DictionaryPageHeader; 21 | data_page_header_v2?: DataPageHeaderV2.DataPageHeaderV2; 22 | } 23 | export class PageHeader { 24 | public type: PageType.PageType; 25 | public uncompressed_page_size: number; 26 | public compressed_page_size: number; 27 | public crc?: number; 28 | public data_page_header?: DataPageHeader.DataPageHeader; 29 | public index_page_header?: IndexPageHeader.IndexPageHeader; 30 | public dictionary_page_header?: DictionaryPageHeader.DictionaryPageHeader; 31 | public data_page_header_v2?: DataPageHeaderV2.DataPageHeaderV2; 32 | constructor(args: IPageHeaderArgs) { 33 | if (args != null && args.type != null) { 34 | this.type = args.type; 35 | } 36 | else { 37 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[type] is unset!"); 38 | } 39 | if (args != null && args.uncompressed_page_size != null) { 40 | this.uncompressed_page_size = args.uncompressed_page_size; 41 | } 42 | else { 43 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[uncompressed_page_size] is unset!"); 44 | } 45 | if (args != null && args.compressed_page_size != null) { 46 | this.compressed_page_size = args.compressed_page_size; 47 | } 48 | else { 49 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[compressed_page_size] is unset!"); 50 | } 51 | if (args != null && args.crc != null) { 52 | this.crc = args.crc; 53 | } 54 | if (args != null && args.data_page_header != null) { 55 | this.data_page_header = args.data_page_header; 56 | } 57 | if (args != null && args.index_page_header != null) { 58 | this.index_page_header = args.index_page_header; 59 | } 60 | if (args != null && args.dictionary_page_header != null) { 61 | this.dictionary_page_header = args.dictionary_page_header; 62 | } 63 | if (args != null && args.data_page_header_v2 != null) { 64 | this.data_page_header_v2 = args.data_page_header_v2; 65 | } 66 | } 67 | public write(output: thrift.TProtocol): void { 68 | output.writeStructBegin("PageHeader"); 69 | if (this.type != null) { 70 | output.writeFieldBegin("type", thrift.Thrift.Type.I32, 1); 71 | output.writeI32(this.type); 72 | output.writeFieldEnd(); 73 | } 74 | if (this.uncompressed_page_size != null) { 75 | output.writeFieldBegin("uncompressed_page_size", thrift.Thrift.Type.I32, 2); 76 | output.writeI32(this.uncompressed_page_size); 77 | output.writeFieldEnd(); 78 | } 79 | if (this.compressed_page_size != null) { 80 | output.writeFieldBegin("compressed_page_size", thrift.Thrift.Type.I32, 3); 81 | output.writeI32(this.compressed_page_size); 82 | output.writeFieldEnd(); 83 | } 84 | if (this.crc != null) { 85 | output.writeFieldBegin("crc", thrift.Thrift.Type.I32, 4); 86 | output.writeI32(this.crc); 87 | output.writeFieldEnd(); 88 | } 89 | if (this.data_page_header != null) { 90 | output.writeFieldBegin("data_page_header", thrift.Thrift.Type.STRUCT, 5); 91 | this.data_page_header.write(output); 92 | output.writeFieldEnd(); 93 | } 94 | if (this.index_page_header != null) { 95 | output.writeFieldBegin("index_page_header", thrift.Thrift.Type.STRUCT, 6); 96 | this.index_page_header.write(output); 97 | output.writeFieldEnd(); 98 | } 99 | if (this.dictionary_page_header != null) { 100 | output.writeFieldBegin("dictionary_page_header", thrift.Thrift.Type.STRUCT, 7); 101 | this.dictionary_page_header.write(output); 102 | output.writeFieldEnd(); 103 | } 104 | if (this.data_page_header_v2 != null) { 105 | output.writeFieldBegin("data_page_header_v2", thrift.Thrift.Type.STRUCT, 8); 106 | this.data_page_header_v2.write(output); 107 | output.writeFieldEnd(); 108 | } 109 | output.writeFieldStop(); 110 | output.writeStructEnd(); 111 | return; 112 | } 113 | public static read(input: thrift.TProtocol): PageHeader { 114 | input.readStructBegin(); 115 | let _args: any = {}; 116 | while (true) { 117 | const ret: thrift.TField = input.readFieldBegin(); 118 | const fieldType: thrift.Thrift.Type = ret.ftype; 119 | const fieldId: number = ret.fid; 120 | if (fieldType === thrift.Thrift.Type.STOP) { 121 | break; 122 | } 123 | switch (fieldId) { 124 | case 1: 125 | if (fieldType === thrift.Thrift.Type.I32) { 126 | const value_1: PageType.PageType = input.readI32(); 127 | _args.type = value_1; 128 | } 129 | else { 130 | input.skip(fieldType); 131 | } 132 | break; 133 | case 2: 134 | if (fieldType === thrift.Thrift.Type.I32) { 135 | const value_2: number = input.readI32(); 136 | _args.uncompressed_page_size = value_2; 137 | } 138 | else { 139 | input.skip(fieldType); 140 | } 141 | break; 142 | case 3: 143 | if (fieldType === thrift.Thrift.Type.I32) { 144 | const value_3: number = input.readI32(); 145 | _args.compressed_page_size = value_3; 146 | } 147 | else { 148 | input.skip(fieldType); 149 | } 150 | break; 151 | case 4: 152 | if (fieldType === thrift.Thrift.Type.I32) { 153 | const value_4: number = input.readI32(); 154 | _args.crc = value_4; 155 | } 156 | else { 157 | input.skip(fieldType); 158 | } 159 | break; 160 | case 5: 161 | if (fieldType === thrift.Thrift.Type.STRUCT) { 162 | const value_5: DataPageHeader.DataPageHeader = DataPageHeader.DataPageHeader.read(input); 163 | _args.data_page_header = value_5; 164 | } 165 | else { 166 | input.skip(fieldType); 167 | } 168 | break; 169 | case 6: 170 | if (fieldType === thrift.Thrift.Type.STRUCT) { 171 | const value_6: IndexPageHeader.IndexPageHeader = IndexPageHeader.IndexPageHeader.read(input); 172 | _args.index_page_header = value_6; 173 | } 174 | else { 175 | input.skip(fieldType); 176 | } 177 | break; 178 | case 7: 179 | if (fieldType === thrift.Thrift.Type.STRUCT) { 180 | const value_7: DictionaryPageHeader.DictionaryPageHeader = DictionaryPageHeader.DictionaryPageHeader.read(input); 181 | _args.dictionary_page_header = value_7; 182 | } 183 | else { 184 | input.skip(fieldType); 185 | } 186 | break; 187 | case 8: 188 | if (fieldType === thrift.Thrift.Type.STRUCT) { 189 | const value_8: DataPageHeaderV2.DataPageHeaderV2 = DataPageHeaderV2.DataPageHeaderV2.read(input); 190 | _args.data_page_header_v2 = value_8; 191 | } 192 | else { 193 | input.skip(fieldType); 194 | } 195 | break; 196 | default: { 197 | input.skip(fieldType); 198 | } 199 | } 200 | input.readFieldEnd(); 201 | } 202 | input.readStructEnd(); 203 | if (_args.type !== undefined && _args.uncompressed_page_size !== undefined && _args.compressed_page_size !== undefined) { 204 | return new PageHeader(_args); 205 | } 206 | else { 207 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read PageHeader from input"); 208 | } 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /src/thrift/DataPageHeaderV2.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | import * as Encoding from "./Encoding"; 9 | import * as Statistics from "./Statistics"; 10 | export interface IDataPageHeaderV2Args { 11 | num_values: number; 12 | num_nulls: number; 13 | num_rows: number; 14 | encoding: Encoding.Encoding; 15 | definition_levels_byte_length: number; 16 | repetition_levels_byte_length: number; 17 | is_compressed?: boolean; 18 | statistics?: Statistics.Statistics; 19 | } 20 | export class DataPageHeaderV2 { 21 | public num_values: number; 22 | public num_nulls: number; 23 | public num_rows: number; 24 | public encoding: Encoding.Encoding; 25 | public definition_levels_byte_length: number; 26 | public repetition_levels_byte_length: number; 27 | public is_compressed?: boolean = true; 28 | public statistics?: Statistics.Statistics; 29 | constructor(args: IDataPageHeaderV2Args) { 30 | if (args != null && args.num_values != null) { 31 | this.num_values = args.num_values; 32 | } 33 | else { 34 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[num_values] is unset!"); 35 | } 36 | if (args != null && args.num_nulls != null) { 37 | this.num_nulls = args.num_nulls; 38 | } 39 | else { 40 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[num_nulls] is unset!"); 41 | } 42 | if (args != null && args.num_rows != null) { 43 | this.num_rows = args.num_rows; 44 | } 45 | else { 46 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[num_rows] is unset!"); 47 | } 48 | if (args != null && args.encoding != null) { 49 | this.encoding = args.encoding; 50 | } 51 | else { 52 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[encoding] is unset!"); 53 | } 54 | if (args != null && args.definition_levels_byte_length != null) { 55 | this.definition_levels_byte_length = args.definition_levels_byte_length; 56 | } 57 | else { 58 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[definition_levels_byte_length] is unset!"); 59 | } 60 | if (args != null && args.repetition_levels_byte_length != null) { 61 | this.repetition_levels_byte_length = args.repetition_levels_byte_length; 62 | } 63 | else { 64 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[repetition_levels_byte_length] is unset!"); 65 | } 66 | if (args != null && args.is_compressed != null) { 67 | this.is_compressed = args.is_compressed; 68 | } 69 | if (args != null && args.statistics != null) { 70 | this.statistics = args.statistics; 71 | } 72 | } 73 | public write(output: thrift.TProtocol): void { 74 | output.writeStructBegin("DataPageHeaderV2"); 75 | if (this.num_values != null) { 76 | output.writeFieldBegin("num_values", thrift.Thrift.Type.I32, 1); 77 | output.writeI32(this.num_values); 78 | output.writeFieldEnd(); 79 | } 80 | if (this.num_nulls != null) { 81 | output.writeFieldBegin("num_nulls", thrift.Thrift.Type.I32, 2); 82 | output.writeI32(this.num_nulls); 83 | output.writeFieldEnd(); 84 | } 85 | if (this.num_rows != null) { 86 | output.writeFieldBegin("num_rows", thrift.Thrift.Type.I32, 3); 87 | output.writeI32(this.num_rows); 88 | output.writeFieldEnd(); 89 | } 90 | if (this.encoding != null) { 91 | output.writeFieldBegin("encoding", thrift.Thrift.Type.I32, 4); 92 | output.writeI32(this.encoding); 93 | output.writeFieldEnd(); 94 | } 95 | if (this.definition_levels_byte_length != null) { 96 | output.writeFieldBegin("definition_levels_byte_length", thrift.Thrift.Type.I32, 5); 97 | output.writeI32(this.definition_levels_byte_length); 98 | output.writeFieldEnd(); 99 | } 100 | if (this.repetition_levels_byte_length != null) { 101 | output.writeFieldBegin("repetition_levels_byte_length", thrift.Thrift.Type.I32, 6); 102 | output.writeI32(this.repetition_levels_byte_length); 103 | output.writeFieldEnd(); 104 | } 105 | if (this.is_compressed != null) { 106 | output.writeFieldBegin("is_compressed", thrift.Thrift.Type.BOOL, 7); 107 | output.writeBool(this.is_compressed); 108 | output.writeFieldEnd(); 109 | } 110 | if (this.statistics != null) { 111 | output.writeFieldBegin("statistics", thrift.Thrift.Type.STRUCT, 8); 112 | this.statistics.write(output); 113 | output.writeFieldEnd(); 114 | } 115 | output.writeFieldStop(); 116 | output.writeStructEnd(); 117 | return; 118 | } 119 | public static read(input: thrift.TProtocol): DataPageHeaderV2 { 120 | input.readStructBegin(); 121 | let _args: any = {}; 122 | while (true) { 123 | const ret: thrift.TField = input.readFieldBegin(); 124 | const fieldType: thrift.Thrift.Type = ret.ftype; 125 | const fieldId: number = ret.fid; 126 | if (fieldType === thrift.Thrift.Type.STOP) { 127 | break; 128 | } 129 | switch (fieldId) { 130 | case 1: 131 | if (fieldType === thrift.Thrift.Type.I32) { 132 | const value_1: number = input.readI32(); 133 | _args.num_values = value_1; 134 | } 135 | else { 136 | input.skip(fieldType); 137 | } 138 | break; 139 | case 2: 140 | if (fieldType === thrift.Thrift.Type.I32) { 141 | const value_2: number = input.readI32(); 142 | _args.num_nulls = value_2; 143 | } 144 | else { 145 | input.skip(fieldType); 146 | } 147 | break; 148 | case 3: 149 | if (fieldType === thrift.Thrift.Type.I32) { 150 | const value_3: number = input.readI32(); 151 | _args.num_rows = value_3; 152 | } 153 | else { 154 | input.skip(fieldType); 155 | } 156 | break; 157 | case 4: 158 | if (fieldType === thrift.Thrift.Type.I32) { 159 | const value_4: Encoding.Encoding = input.readI32(); 160 | _args.encoding = value_4; 161 | } 162 | else { 163 | input.skip(fieldType); 164 | } 165 | break; 166 | case 5: 167 | if (fieldType === thrift.Thrift.Type.I32) { 168 | const value_5: number = input.readI32(); 169 | _args.definition_levels_byte_length = value_5; 170 | } 171 | else { 172 | input.skip(fieldType); 173 | } 174 | break; 175 | case 6: 176 | if (fieldType === thrift.Thrift.Type.I32) { 177 | const value_6: number = input.readI32(); 178 | _args.repetition_levels_byte_length = value_6; 179 | } 180 | else { 181 | input.skip(fieldType); 182 | } 183 | break; 184 | case 7: 185 | if (fieldType === thrift.Thrift.Type.BOOL) { 186 | const value_7: boolean = input.readBool(); 187 | _args.is_compressed = value_7; 188 | } 189 | else { 190 | input.skip(fieldType); 191 | } 192 | break; 193 | case 8: 194 | if (fieldType === thrift.Thrift.Type.STRUCT) { 195 | const value_8: Statistics.Statistics = Statistics.Statistics.read(input); 196 | _args.statistics = value_8; 197 | } 198 | else { 199 | input.skip(fieldType); 200 | } 201 | break; 202 | default: { 203 | input.skip(fieldType); 204 | } 205 | } 206 | input.readFieldEnd(); 207 | } 208 | input.readStructEnd(); 209 | if (_args.num_values !== undefined && _args.num_nulls !== undefined && _args.num_rows !== undefined && _args.encoding !== undefined && _args.definition_levels_byte_length !== undefined && _args.repetition_levels_byte_length !== undefined) { 210 | return new DataPageHeaderV2(_args); 211 | } 212 | else { 213 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read DataPageHeaderV2 from input"); 214 | } 215 | } 216 | } 217 | -------------------------------------------------------------------------------- /src/thrift/SchemaElement.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import * as thrift from "thrift"; 8 | import * as ConvertedType from "./ConvertedType"; 9 | import * as FieldRepetitionType from "./FieldRepetitionType"; 10 | import * as LogicalType from "./LogicalType"; 11 | import * as Type from "./Type"; 12 | export interface ISchemaElementArgs { 13 | type?: Type.Type; 14 | type_length?: number; 15 | repetition_type?: FieldRepetitionType.FieldRepetitionType; 16 | name: string; 17 | num_children?: number; 18 | converted_type?: ConvertedType.ConvertedType; 19 | scale?: number; 20 | precision?: number; 21 | field_id?: number; 22 | logicalType?: LogicalType.LogicalType; 23 | } 24 | export class SchemaElement { 25 | public type?: Type.Type; 26 | public type_length?: number; 27 | public repetition_type?: FieldRepetitionType.FieldRepetitionType; 28 | public name: string; 29 | public num_children?: number; 30 | public converted_type?: ConvertedType.ConvertedType; 31 | public scale?: number; 32 | public precision?: number; 33 | public field_id?: number; 34 | public logicalType?: LogicalType.LogicalType; 35 | constructor(args: ISchemaElementArgs) { 36 | if (args != null && args.type != null) { 37 | this.type = args.type; 38 | } 39 | if (args != null && args.type_length != null) { 40 | this.type_length = args.type_length; 41 | } 42 | if (args != null && args.repetition_type != null) { 43 | this.repetition_type = args.repetition_type; 44 | } 45 | if (args != null && args.name != null) { 46 | this.name = args.name; 47 | } 48 | else { 49 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[name] is unset!"); 50 | } 51 | if (args != null && args.num_children != null) { 52 | this.num_children = args.num_children; 53 | } 54 | if (args != null && args.converted_type != null) { 55 | this.converted_type = args.converted_type; 56 | } 57 | if (args != null && args.scale != null) { 58 | this.scale = args.scale; 59 | } 60 | if (args != null && args.precision != null) { 61 | this.precision = args.precision; 62 | } 63 | if (args != null && args.field_id != null) { 64 | this.field_id = args.field_id; 65 | } 66 | if (args != null && args.logicalType != null) { 67 | this.logicalType = args.logicalType; 68 | } 69 | } 70 | public write(output: thrift.TProtocol): void { 71 | output.writeStructBegin("SchemaElement"); 72 | if (this.type != null) { 73 | output.writeFieldBegin("type", thrift.Thrift.Type.I32, 1); 74 | output.writeI32(this.type); 75 | output.writeFieldEnd(); 76 | } 77 | if (this.type_length != null) { 78 | output.writeFieldBegin("type_length", thrift.Thrift.Type.I32, 2); 79 | output.writeI32(this.type_length); 80 | output.writeFieldEnd(); 81 | } 82 | if (this.repetition_type != null) { 83 | output.writeFieldBegin("repetition_type", thrift.Thrift.Type.I32, 3); 84 | output.writeI32(this.repetition_type); 85 | output.writeFieldEnd(); 86 | } 87 | if (this.name != null) { 88 | output.writeFieldBegin("name", thrift.Thrift.Type.STRING, 4); 89 | output.writeString(this.name); 90 | output.writeFieldEnd(); 91 | } 92 | if (this.num_children != null) { 93 | output.writeFieldBegin("num_children", thrift.Thrift.Type.I32, 5); 94 | output.writeI32(this.num_children); 95 | output.writeFieldEnd(); 96 | } 97 | if (this.converted_type != null) { 98 | output.writeFieldBegin("converted_type", thrift.Thrift.Type.I32, 6); 99 | output.writeI32(this.converted_type); 100 | output.writeFieldEnd(); 101 | } 102 | if (this.scale != null) { 103 | output.writeFieldBegin("scale", thrift.Thrift.Type.I32, 7); 104 | output.writeI32(this.scale); 105 | output.writeFieldEnd(); 106 | } 107 | if (this.precision != null) { 108 | output.writeFieldBegin("precision", thrift.Thrift.Type.I32, 8); 109 | output.writeI32(this.precision); 110 | output.writeFieldEnd(); 111 | } 112 | if (this.field_id != null) { 113 | output.writeFieldBegin("field_id", thrift.Thrift.Type.I32, 9); 114 | output.writeI32(this.field_id); 115 | output.writeFieldEnd(); 116 | } 117 | if (this.logicalType != null) { 118 | output.writeFieldBegin("logicalType", thrift.Thrift.Type.STRUCT, 10); 119 | this.logicalType.write(output); 120 | output.writeFieldEnd(); 121 | } 122 | output.writeFieldStop(); 123 | output.writeStructEnd(); 124 | return; 125 | } 126 | public static read(input: thrift.TProtocol): SchemaElement { 127 | input.readStructBegin(); 128 | let _args: any = {}; 129 | while (true) { 130 | const ret: thrift.TField = input.readFieldBegin(); 131 | const fieldType: thrift.Thrift.Type = ret.ftype; 132 | const fieldId: number = ret.fid; 133 | if (fieldType === thrift.Thrift.Type.STOP) { 134 | break; 135 | } 136 | switch (fieldId) { 137 | case 1: 138 | if (fieldType === thrift.Thrift.Type.I32) { 139 | const value_1: Type.Type = input.readI32(); 140 | _args.type = value_1; 141 | } 142 | else { 143 | input.skip(fieldType); 144 | } 145 | break; 146 | case 2: 147 | if (fieldType === thrift.Thrift.Type.I32) { 148 | const value_2: number = input.readI32(); 149 | _args.type_length = value_2; 150 | } 151 | else { 152 | input.skip(fieldType); 153 | } 154 | break; 155 | case 3: 156 | if (fieldType === thrift.Thrift.Type.I32) { 157 | const value_3: FieldRepetitionType.FieldRepetitionType = input.readI32(); 158 | _args.repetition_type = value_3; 159 | } 160 | else { 161 | input.skip(fieldType); 162 | } 163 | break; 164 | case 4: 165 | if (fieldType === thrift.Thrift.Type.STRING) { 166 | const value_4: string = input.readString(); 167 | _args.name = value_4; 168 | } 169 | else { 170 | input.skip(fieldType); 171 | } 172 | break; 173 | case 5: 174 | if (fieldType === thrift.Thrift.Type.I32) { 175 | const value_5: number = input.readI32(); 176 | _args.num_children = value_5; 177 | } 178 | else { 179 | input.skip(fieldType); 180 | } 181 | break; 182 | case 6: 183 | if (fieldType === thrift.Thrift.Type.I32) { 184 | const value_6: ConvertedType.ConvertedType = input.readI32(); 185 | _args.converted_type = value_6; 186 | } 187 | else { 188 | input.skip(fieldType); 189 | } 190 | break; 191 | case 7: 192 | if (fieldType === thrift.Thrift.Type.I32) { 193 | const value_7: number = input.readI32(); 194 | _args.scale = value_7; 195 | } 196 | else { 197 | input.skip(fieldType); 198 | } 199 | break; 200 | case 8: 201 | if (fieldType === thrift.Thrift.Type.I32) { 202 | const value_8: number = input.readI32(); 203 | _args.precision = value_8; 204 | } 205 | else { 206 | input.skip(fieldType); 207 | } 208 | break; 209 | case 9: 210 | if (fieldType === thrift.Thrift.Type.I32) { 211 | const value_9: number = input.readI32(); 212 | _args.field_id = value_9; 213 | } 214 | else { 215 | input.skip(fieldType); 216 | } 217 | break; 218 | case 10: 219 | if (fieldType === thrift.Thrift.Type.STRUCT) { 220 | const value_10: LogicalType.LogicalType = LogicalType.LogicalType.read(input); 221 | _args.logicalType = value_10; 222 | } 223 | else { 224 | input.skip(fieldType); 225 | } 226 | break; 227 | default: { 228 | input.skip(fieldType); 229 | } 230 | } 231 | input.readFieldEnd(); 232 | } 233 | input.readStructEnd(); 234 | if (_args.name !== undefined) { 235 | return new SchemaElement(_args); 236 | } 237 | else { 238 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read SchemaElement from input"); 239 | } 240 | } 241 | } 242 | -------------------------------------------------------------------------------- /src/thrift/FileMetaData.ts: -------------------------------------------------------------------------------- 1 | /* tslint:disable */ 2 | /* eslint-disable */ 3 | /* 4 | * Autogenerated by @creditkarma/thrift-typescript v3.7.2 5 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | */ 7 | import Int64 = require("node-int64"); 8 | import * as thrift from "thrift"; 9 | import * as ColumnOrder from "./ColumnOrder"; 10 | import * as KeyValue from "./KeyValue"; 11 | import * as RowGroup from "./RowGroup"; 12 | import * as SchemaElement from "./SchemaElement"; 13 | export interface IFileMetaDataArgs { 14 | version: number; 15 | schema: Array; 16 | num_rows: number | Int64; 17 | row_groups: Array; 18 | key_value_metadata?: Array; 19 | created_by?: string; 20 | column_orders?: Array; 21 | } 22 | export class FileMetaData { 23 | public version: number; 24 | public schema: Array; 25 | public num_rows: Int64; 26 | public row_groups: Array; 27 | public key_value_metadata?: Array; 28 | public created_by?: string; 29 | public column_orders?: Array; 30 | constructor(args: IFileMetaDataArgs) { 31 | if (args != null && args.version != null) { 32 | this.version = args.version; 33 | } 34 | else { 35 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[version] is unset!"); 36 | } 37 | if (args != null && args.schema != null) { 38 | this.schema = args.schema; 39 | } 40 | else { 41 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[schema] is unset!"); 42 | } 43 | if (args != null && args.num_rows != null) { 44 | if (typeof args.num_rows === "number") { 45 | this.num_rows = new Int64(args.num_rows); 46 | } 47 | else { 48 | this.num_rows = args.num_rows; 49 | } 50 | } 51 | else { 52 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[num_rows] is unset!"); 53 | } 54 | if (args != null && args.row_groups != null) { 55 | this.row_groups = args.row_groups; 56 | } 57 | else { 58 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Required field[row_groups] is unset!"); 59 | } 60 | if (args != null && args.key_value_metadata != null) { 61 | this.key_value_metadata = args.key_value_metadata; 62 | } 63 | if (args != null && args.created_by != null) { 64 | this.created_by = args.created_by; 65 | } 66 | if (args != null && args.column_orders != null) { 67 | this.column_orders = args.column_orders; 68 | } 69 | } 70 | public write(output: thrift.TProtocol): void { 71 | output.writeStructBegin("FileMetaData"); 72 | if (this.version != null) { 73 | output.writeFieldBegin("version", thrift.Thrift.Type.I32, 1); 74 | output.writeI32(this.version); 75 | output.writeFieldEnd(); 76 | } 77 | if (this.schema != null) { 78 | output.writeFieldBegin("schema", thrift.Thrift.Type.LIST, 2); 79 | output.writeListBegin(thrift.Thrift.Type.STRUCT, this.schema.length); 80 | this.schema.forEach((value_1: SchemaElement.SchemaElement): void => { 81 | value_1.write(output); 82 | }); 83 | output.writeListEnd(); 84 | output.writeFieldEnd(); 85 | } 86 | if (this.num_rows != null) { 87 | output.writeFieldBegin("num_rows", thrift.Thrift.Type.I64, 3); 88 | output.writeI64(this.num_rows); 89 | output.writeFieldEnd(); 90 | } 91 | if (this.row_groups != null) { 92 | output.writeFieldBegin("row_groups", thrift.Thrift.Type.LIST, 4); 93 | output.writeListBegin(thrift.Thrift.Type.STRUCT, this.row_groups.length); 94 | this.row_groups.forEach((value_2: RowGroup.RowGroup): void => { 95 | value_2.write(output); 96 | }); 97 | output.writeListEnd(); 98 | output.writeFieldEnd(); 99 | } 100 | if (this.key_value_metadata != null) { 101 | output.writeFieldBegin("key_value_metadata", thrift.Thrift.Type.LIST, 5); 102 | output.writeListBegin(thrift.Thrift.Type.STRUCT, this.key_value_metadata.length); 103 | this.key_value_metadata.forEach((value_3: KeyValue.KeyValue): void => { 104 | value_3.write(output); 105 | }); 106 | output.writeListEnd(); 107 | output.writeFieldEnd(); 108 | } 109 | if (this.created_by != null) { 110 | output.writeFieldBegin("created_by", thrift.Thrift.Type.STRING, 6); 111 | output.writeString(this.created_by); 112 | output.writeFieldEnd(); 113 | } 114 | if (this.column_orders != null) { 115 | output.writeFieldBegin("column_orders", thrift.Thrift.Type.LIST, 7); 116 | output.writeListBegin(thrift.Thrift.Type.STRUCT, this.column_orders.length); 117 | this.column_orders.forEach((value_4: ColumnOrder.ColumnOrder): void => { 118 | value_4.write(output); 119 | }); 120 | output.writeListEnd(); 121 | output.writeFieldEnd(); 122 | } 123 | output.writeFieldStop(); 124 | output.writeStructEnd(); 125 | return; 126 | } 127 | public static read(input: thrift.TProtocol): FileMetaData { 128 | input.readStructBegin(); 129 | let _args: any = {}; 130 | while (true) { 131 | const ret: thrift.TField = input.readFieldBegin(); 132 | const fieldType: thrift.Thrift.Type = ret.ftype; 133 | const fieldId: number = ret.fid; 134 | if (fieldType === thrift.Thrift.Type.STOP) { 135 | break; 136 | } 137 | switch (fieldId) { 138 | case 1: 139 | if (fieldType === thrift.Thrift.Type.I32) { 140 | const value_5: number = input.readI32(); 141 | _args.version = value_5; 142 | } 143 | else { 144 | input.skip(fieldType); 145 | } 146 | break; 147 | case 2: 148 | if (fieldType === thrift.Thrift.Type.LIST) { 149 | const value_6: Array = new Array(); 150 | const metadata_1: thrift.TList = input.readListBegin(); 151 | const size_1: number = metadata_1.size; 152 | for (let i_1: number = 0; i_1 < size_1; i_1++) { 153 | const value_7: SchemaElement.SchemaElement = SchemaElement.SchemaElement.read(input); 154 | value_6.push(value_7); 155 | } 156 | input.readListEnd(); 157 | _args.schema = value_6; 158 | } 159 | else { 160 | input.skip(fieldType); 161 | } 162 | break; 163 | case 3: 164 | if (fieldType === thrift.Thrift.Type.I64) { 165 | const value_8: Int64 = input.readI64(); 166 | _args.num_rows = value_8; 167 | } 168 | else { 169 | input.skip(fieldType); 170 | } 171 | break; 172 | case 4: 173 | if (fieldType === thrift.Thrift.Type.LIST) { 174 | const value_9: Array = new Array(); 175 | const metadata_2: thrift.TList = input.readListBegin(); 176 | const size_2: number = metadata_2.size; 177 | for (let i_2: number = 0; i_2 < size_2; i_2++) { 178 | const value_10: RowGroup.RowGroup = RowGroup.RowGroup.read(input); 179 | value_9.push(value_10); 180 | } 181 | input.readListEnd(); 182 | _args.row_groups = value_9; 183 | } 184 | else { 185 | input.skip(fieldType); 186 | } 187 | break; 188 | case 5: 189 | if (fieldType === thrift.Thrift.Type.LIST) { 190 | const value_11: Array = new Array(); 191 | const metadata_3: thrift.TList = input.readListBegin(); 192 | const size_3: number = metadata_3.size; 193 | for (let i_3: number = 0; i_3 < size_3; i_3++) { 194 | const value_12: KeyValue.KeyValue = KeyValue.KeyValue.read(input); 195 | value_11.push(value_12); 196 | } 197 | input.readListEnd(); 198 | _args.key_value_metadata = value_11; 199 | } 200 | else { 201 | input.skip(fieldType); 202 | } 203 | break; 204 | case 6: 205 | if (fieldType === thrift.Thrift.Type.STRING) { 206 | const value_13: string = input.readString(); 207 | _args.created_by = value_13; 208 | } 209 | else { 210 | input.skip(fieldType); 211 | } 212 | break; 213 | case 7: 214 | if (fieldType === thrift.Thrift.Type.LIST) { 215 | const value_14: Array = new Array(); 216 | const metadata_4: thrift.TList = input.readListBegin(); 217 | const size_4: number = metadata_4.size; 218 | for (let i_4: number = 0; i_4 < size_4; i_4++) { 219 | const value_15: ColumnOrder.ColumnOrder = ColumnOrder.ColumnOrder.read(input); 220 | value_14.push(value_15); 221 | } 222 | input.readListEnd(); 223 | _args.column_orders = value_14; 224 | } 225 | else { 226 | input.skip(fieldType); 227 | } 228 | break; 229 | default: { 230 | input.skip(fieldType); 231 | } 232 | } 233 | input.readFieldEnd(); 234 | } 235 | input.readStructEnd(); 236 | if (_args.version !== undefined && _args.schema !== undefined && _args.num_rows !== undefined && _args.row_groups !== undefined) { 237 | return new FileMetaData(_args); 238 | } 239 | else { 240 | throw new thrift.Thrift.TProtocolException(thrift.Thrift.TProtocolExceptionType.UNKNOWN, "Unable to read FileMetaData from input"); 241 | } 242 | } 243 | } 244 | --------------------------------------------------------------------------------