├── .github ├── dependabot.yml └── workflows │ ├── release.yml │ ├── test.yml │ └── web.yml ├── .gitignore ├── .goreleaser.yml ├── Dockerfile ├── LICENSE ├── Makefile ├── cmd ├── gpq │ ├── command │ │ ├── command.go │ │ ├── command_test.go │ │ ├── convert.go │ │ ├── convert_test.go │ │ ├── describe.go │ │ ├── describe_test.go │ │ ├── extract.go │ │ ├── extract_test.go │ │ ├── validate.go │ │ └── version.go │ └── main.go └── wasm │ └── main.go ├── go.mod ├── go.sum ├── internal ├── geo │ ├── geo.go │ └── geo_test.go ├── geojson │ ├── featurereader.go │ ├── featurereader_test.go │ ├── geojson.go │ ├── geojson_test.go │ ├── recordwriter.go │ └── testdata │ │ ├── all-null-geom.geojson │ │ ├── array-id.geojson │ │ ├── array.json │ │ ├── bad-collection.geojson │ │ ├── bad-new-line-delimited.ndgeojson │ │ ├── boolean-id.geojson │ │ ├── empty-collection.geojson │ │ ├── example.geojson │ │ ├── extra-array.geojson │ │ ├── extra-object.geojson │ │ ├── feature.geojson │ │ ├── mismatched-types.geojson │ │ ├── nested-props.geojson │ │ ├── new-line-delimited.ndgeojson │ │ ├── not-geojson.json │ │ ├── null-geom.geojson │ │ ├── number-id.geojson │ │ ├── object-id.geojson │ │ ├── point-geometry.geojson │ │ ├── repeated-props.geojson │ │ ├── sparse-properties.geojson │ │ ├── string-id.geojson │ │ ├── ten-points.geojson │ │ └── with-crs.geojson ├── geoparquet │ ├── featurewriter.go │ ├── filter.go │ ├── filter_test.go │ ├── geoparquet.go │ ├── geoparquet_test.go │ ├── metadata.go │ ├── recordreader.go │ ├── recordwriter.go │ └── writer.go ├── pqutil │ ├── arrow.go │ ├── arrow_test.go │ ├── compression.go │ ├── parquet.go │ ├── parquet_test.go │ ├── transform.go │ └── transform_test.go ├── storage │ ├── blob.go │ ├── blob_test.go │ ├── http.go │ ├── http_test.go │ ├── storage.go │ └── storage_test.go ├── test │ └── test.go ├── testdata │ ├── cases │ │ ├── example-v0.4.0.parquet │ │ ├── example-v1.0.0-beta.1.parquet │ │ ├── example-v1.0.0.parquet │ │ ├── example-v1.1.0-covering.parquet │ │ ├── example-v1.1.0-partitioned.parquet │ │ └── example-v1.1.0.parquet │ └── schema │ │ ├── geoparquet.org │ │ └── releases │ │ │ ├── v0.4.0 │ │ │ └── schema.json │ │ │ ├── v1.0.0-beta.1 │ │ │ └── schema.json │ │ │ └── v1.0.0 │ │ │ └── schema.json │ │ └── proj.org │ │ └── schemas │ │ ├── v0.4 │ │ └── projjson.schema.json │ │ ├── v0.5 │ │ └── projjson.schema.json │ │ └── v0.6 │ │ └── projjson.schema.json └── validator │ ├── rules.go │ ├── testdata │ ├── .gitignore │ ├── all-pass-meta │ │ ├── expected.json │ │ └── input.json │ ├── all-pass-minimal │ │ ├── expected.json │ │ └── input.json │ ├── all-pass │ │ ├── expected.json │ │ └── input.json │ ├── bad-bbox-item-type │ │ ├── expected.json │ │ └── input.json │ ├── bad-bbox-length │ │ ├── expected.json │ │ └── input.json │ ├── bad-bbox-type │ │ ├── expected.json │ │ └── input.json │ ├── bad-crs-type │ │ ├── expected.json │ │ └── input.json │ ├── bad-crs │ │ ├── expected.json │ │ └── input.json │ ├── bad-edges │ │ ├── expected.json │ │ └── input.json │ ├── bad-encoding │ │ ├── expected.json │ │ └── input.json │ ├── bad-epoch │ │ ├── expected.json │ │ └── input.json │ ├── bad-geometry-types │ │ ├── expected.json │ │ └── input.json │ ├── bad-metadata-type │ │ ├── expected.json │ │ └── input.json │ ├── bad-orientation │ │ ├── expected.json │ │ └── input.json │ ├── bad-primary-column │ │ ├── expected.json │ │ └── input.json │ ├── complex-types │ │ ├── expected.json │ │ └── input.json │ ├── geometry-correctly-oriented │ │ ├── expected.json │ │ └── input.json │ ├── geometry-incorrectly-oriented │ │ ├── expected.json │ │ └── input.json │ ├── geometry-inside-antimeridian-spanning-bbox │ │ ├── expected.json │ │ └── input.json │ ├── geometry-outside-antimeridian-spanning-bbox │ │ ├── expected.json │ │ └── input.json │ ├── geometry-outside-bbox │ │ ├── expected.json │ │ └── input.json │ ├── geometry-type-not-in-list │ │ ├── expected.json │ │ └── input.json │ ├── missing-columns │ │ ├── expected.json │ │ └── input.json │ ├── missing-encoding │ │ ├── expected.json │ │ └── input.json │ ├── missing-geometry-types │ │ ├── expected.json │ │ └── input.json │ ├── missing-primary-column │ │ ├── expected.json │ │ └── input.json │ ├── missing-version │ │ ├── expected.json │ │ └── input.json │ ├── with-empty-geometry │ │ ├── expected.json │ │ └── input.json │ └── with-null-geometry │ │ ├── expected.json │ │ └── input.json │ ├── validator.go │ └── validator_test.go ├── readme.md └── web ├── .gitignore ├── index.html ├── main.js ├── package-lock.json └── package.json /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: "weekly" 8 | 9 | - package-ecosystem: "gomod" 10 | directory: "/" 11 | schedule: 12 | interval: "weekly" 13 | 14 | - package-ecosystem: npm 15 | directory: "/web/" 16 | schedule: 17 | interval: weekly 18 | open-pull-requests-limit: 10 19 | versioning-strategy: increase-if-necessary 20 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*' 7 | 8 | permissions: 9 | contents: write 10 | packages: write 11 | 12 | 13 | jobs: 14 | release: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v4 18 | with: 19 | fetch-depth: 0 20 | - run: git fetch --force --tags 21 | - uses: actions/setup-go@v5.5.0 22 | with: 23 | go-version: '1.24' 24 | - uses: docker/login-action@v3 25 | with: 26 | registry: ghcr.io 27 | username: ${{ github.actor }} 28 | password: ${{ secrets.GITHUB_TOKEN }} 29 | - uses: goreleaser/goreleaser-action@v6 30 | with: 31 | distribution: goreleaser 32 | version: latest 33 | args: release --clean 34 | env: 35 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 36 | HOMEBREW_TAP_GITHUB_TOKEN: ${{ secrets.HOMEBREW_TAP_GITHUB_TOKEN }} 37 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | lint: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | - uses: actions/setup-go@v5.5.0 17 | with: 18 | go-version: '1.24' 19 | - uses: golangci/golangci-lint-action@v8 20 | with: 21 | version: v2.1.6 22 | 23 | test: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v4 27 | - uses: actions/setup-go@v5.5.0 28 | with: 29 | go-version: '1.24' 30 | - run: go test -v ./... 31 | 32 | release-check: 33 | runs-on: ubuntu-latest 34 | steps: 35 | - uses: actions/checkout@v4 36 | with: 37 | fetch-depth: 0 38 | - name: Checkout 39 | uses: actions/checkout@v4 40 | - name: Setup Go 41 | uses: actions/setup-go@v5.5.0 42 | with: 43 | go-version: '1.24' 44 | - uses: goreleaser/goreleaser-action@v6 45 | with: 46 | distribution: goreleaser 47 | version: latest 48 | args: check 49 | 50 | wasm: 51 | runs-on: ubuntu-latest 52 | steps: 53 | - name: Checkout 54 | uses: actions/checkout@v4 55 | - name: Setup Go 56 | uses: actions/setup-go@v5.5.0 57 | with: 58 | go-version: '1.24' 59 | - name: Build WASM 60 | run: make wasm 61 | 62 | web: 63 | runs-on: ubuntu-latest 64 | steps: 65 | - name: Checkout 66 | uses: actions/checkout@v4 67 | - name: Setup Node 68 | uses: actions/setup-node@v4 69 | with: 70 | node-version: '20' 71 | - name: Setup Go 72 | uses: actions/setup-go@v5.5.0 73 | with: 74 | go-version: '1.24' 75 | - name: Build WASM 76 | run: make wasm 77 | - name: Install Dependencies 78 | working-directory: web 79 | run: npm ci 80 | - name: Run Tests 81 | working-directory: web 82 | run: npm test 83 | -------------------------------------------------------------------------------- /.github/workflows/web.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Web Page 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | permissions: 9 | contents: read 10 | pages: write 11 | id-token: write 12 | 13 | concurrency: 14 | group: "pages" 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | deploy: 19 | environment: 20 | name: github-pages 21 | url: ${{ steps.deployment.outputs.page_url }} 22 | runs-on: ubuntu-latest 23 | steps: 24 | - name: Checkout 25 | uses: actions/checkout@v4 26 | - name: Setup Go 27 | uses: actions/setup-go@v5.5.0 28 | with: 29 | go-version: '1.22' 30 | - name: Build WASM 31 | run: make wasm 32 | - name: Upload artifact 33 | uses: actions/upload-pages-artifact@v3 34 | with: 35 | path: web 36 | - name: Deploy to GitHub Pages 37 | id: deployment 38 | uses: actions/deploy-pages@v4 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /dist/ 2 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | # yaml-language-server: $schema=https://goreleaser.com/static/schema.json 2 | version: 2 3 | 4 | before: 5 | hooks: 6 | - go mod tidy 7 | builds: 8 | - id: gpq 9 | main: ./cmd/gpq/ 10 | binary: gpq 11 | env: 12 | - CGO_ENABLED=0 13 | goos: 14 | - linux 15 | - windows 16 | - darwin 17 | goarch: 18 | - amd64 19 | - arm64 20 | tags: 21 | - noasm 22 | - id: gpq-wasm 23 | main: ./cmd/wasm/ 24 | binary: gpq 25 | env: 26 | - CGO_ENABLED=0 27 | goos: 28 | - js 29 | goarch: 30 | - wasm 31 | tags: 32 | - noasm 33 | archives: 34 | - name_template: >- 35 | {{ .ProjectName }}- 36 | {{- .Os }}- 37 | {{- .Arch }} 38 | dockers: 39 | - image_templates: 40 | - "ghcr.io/planetlabs/gpq:{{ .Tag }}" 41 | - "ghcr.io/planetlabs/gpq:v{{ .Major }}" 42 | build_flag_templates: 43 | - --label=org.opencontainers.image.licenses=MIT 44 | - --label=org.opencontainers.image.version={{ .Version }} 45 | - --label=org.opencontainers.image.revision={{ .FullCommit }} 46 | - --label=org.opencontainers.image.created={{ time "2006-01-02T15:04:05Z07:00" }} 47 | - --label=org.opencontainers.image.url=https://github.com/planetlabs/{{ .ProjectName }} 48 | checksum: 49 | name_template: 'checksums.txt' 50 | snapshot: 51 | version_template: "{{ incpatch .Version }}-next" 52 | changelog: 53 | sort: asc 54 | use: github-native 55 | brews: 56 | - repository: 57 | owner: planetlabs 58 | name: homebrew-tap 59 | token: "{{ .Env.HOMEBREW_TAP_GITHUB_TOKEN }}" 60 | url_template: "https://github.com/planetlabs/{{ .ProjectName }}/releases/download/{{ .Tag }}/{{ .ArtifactName }}" 61 | commit_msg_template: "Brew formula update for {{ .ProjectName }} version {{ .Tag }}" 62 | homepage: "https://github.com/planetlabs/gpq" 63 | description: "Utility for working with GeoParquet." 64 | license: "Apache-2.0" 65 | test: | 66 | system "#{bin}/gpq version" 67 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:latest as certs 2 | RUN apk --update add ca-certificates 3 | 4 | FROM scratch 5 | COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt 6 | COPY gpq /bin/gpq 7 | ENTRYPOINT ["/bin/gpq"] 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | MAKEFLAGS += --warn-undefined-variables 2 | SHELL := /bin/bash -o pipefail -euc 3 | .DEFAULT_GOAL := help 4 | 5 | .PHONY: help 6 | help: 7 | @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) 8 | 9 | .PHONY: wasm 10 | wasm: web/wasm_exec.js ## Build wasm 11 | @GOOS=js GOARCH=wasm go build -tags noasm -o web/gpq.wasm ./cmd/wasm/. 12 | 13 | web/wasm_exec.js: ## Copy the wasm_exec.js file 14 | @cp "$$(go env GOROOT)/lib/wasm/wasm_exec.js" web 15 | 16 | .PHONY: test 17 | test: ## Run the tests 18 | @go test ./... 19 | 20 | .PHONY: fixtures 21 | fixtures: ## Run validator tests and update expected fixtures to match actuals 22 | @go test ./internal/validator/... >/dev/null || true 23 | @for f in ./internal/validator/testdata/*/actual.json; \ 24 | do \ 25 | cp "$$f" "$$(echo "$$f" | sed s/actual.json/expected.json/)"; \ 26 | done; 27 | -------------------------------------------------------------------------------- /cmd/gpq/command/command.go: -------------------------------------------------------------------------------- 1 | package command 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "io" 8 | "net/url" 9 | "os" 10 | 11 | "github.com/planetlabs/gpq/internal/storage" 12 | ) 13 | 14 | var CLI struct { 15 | Convert ConvertCmd `cmd:"" help:"Convert data from one format to another."` 16 | Validate ValidateCmd `cmd:"" help:"Validate a GeoParquet file."` 17 | Describe DescribeCmd `cmd:"" help:"Describe a GeoParquet file."` 18 | Extract ExtractCmd `cmd:"" help:"Extract columns by name or rows by spatial subsetting."` 19 | Version VersionCmd `cmd:"" help:"Print the version of this program."` 20 | } 21 | 22 | type CommandError struct { 23 | err error 24 | } 25 | 26 | func NewCommandError(format string, a ...any) *CommandError { 27 | return &CommandError{err: fmt.Errorf(format, a...)} 28 | } 29 | 30 | func (e *CommandError) Error() string { 31 | return e.err.Error() 32 | } 33 | 34 | func (e *CommandError) Unwrap() error { 35 | return e.err 36 | } 37 | 38 | func readerFromInput(input string) (storage.ReaderAtSeeker, error) { 39 | if input == "" { 40 | data, err := io.ReadAll(os.Stdin) 41 | if err != nil { 42 | return nil, fmt.Errorf("trouble reading from stdin: %w", err) 43 | } 44 | return bytes.NewReader(data), nil 45 | } 46 | 47 | if u, err := url.Parse(input); err == nil && u.Scheme != "" { 48 | return storage.NewReader(context.Background(), input) 49 | } 50 | 51 | return os.Open(input) 52 | } 53 | 54 | func hasStdin() bool { 55 | stats, err := os.Stdin.Stat() 56 | if err != nil { 57 | return false 58 | } 59 | return stats.Size() > 0 60 | } 61 | -------------------------------------------------------------------------------- /cmd/gpq/command/command_test.go: -------------------------------------------------------------------------------- 1 | package command_test 2 | 3 | import ( 4 | "io" 5 | "net/http" 6 | "net/http/httptest" 7 | "os" 8 | "testing" 9 | 10 | "github.com/stretchr/testify/suite" 11 | ) 12 | 13 | type Suite struct { 14 | suite.Suite 15 | originalStdin *os.File 16 | mockStdin *os.File 17 | originalStdout *os.File 18 | mockStdout *os.File 19 | server *httptest.Server 20 | } 21 | 22 | func (s *Suite) SetupTest() { 23 | stdin, err := os.CreateTemp("", "stdin") 24 | s.Require().NoError(err) 25 | s.originalStdin = os.Stdin 26 | s.mockStdin = stdin 27 | os.Stdin = stdin 28 | 29 | stdout, err := os.CreateTemp("", "stdout") 30 | s.Require().NoError(err) 31 | s.originalStdout = os.Stdout 32 | s.mockStdout = stdout 33 | os.Stdout = stdout 34 | 35 | handler := http.FileServer(http.Dir("../../../internal")) 36 | s.server = httptest.NewServer(handler) 37 | } 38 | 39 | func (s *Suite) writeStdin(data []byte) { 40 | _, writeErr := s.mockStdin.Write(data) 41 | s.Require().NoError(writeErr) 42 | _, seekErr := s.mockStdin.Seek(0, 0) 43 | s.Require().NoError(seekErr) 44 | } 45 | 46 | func (s *Suite) readStdout() []byte { 47 | if _, seekErr := s.mockStdout.Seek(0, 0); seekErr != nil { 48 | // assume the file is closed 49 | stdout, err := os.Open(s.mockStdout.Name()) 50 | s.Require().NoError(err) 51 | s.mockStdout = stdout 52 | } 53 | data, err := io.ReadAll(s.mockStdout) 54 | s.Require().NoError(err) 55 | return data 56 | } 57 | 58 | func (s *Suite) TearDownTest() { 59 | os.Stdout = s.originalStdout 60 | os.Stdin = s.originalStdin 61 | 62 | _ = s.mockStdin.Close() 63 | s.NoError(os.Remove(s.mockStdin.Name())) 64 | 65 | _ = s.mockStdout.Close() 66 | s.NoError(os.Remove(s.mockStdout.Name())) 67 | 68 | s.server.Close() 69 | } 70 | 71 | func TestSuite(t *testing.T) { 72 | suite.Run(t, &Suite{}) 73 | } 74 | -------------------------------------------------------------------------------- /cmd/gpq/command/convert_test.go: -------------------------------------------------------------------------------- 1 | package command_test 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | 7 | "github.com/apache/arrow/go/v16/parquet/file" 8 | "github.com/planetlabs/gpq/cmd/gpq/command" 9 | "github.com/planetlabs/gpq/internal/geo" 10 | "github.com/planetlabs/gpq/internal/test" 11 | ) 12 | 13 | func (s *Suite) TestConvertGeoParquetToGeoJSONStdout() { 14 | cmd := &command.ConvertCmd{ 15 | From: "auto", 16 | Input: "../../../internal/testdata/cases/example-v1.0.0.parquet", 17 | To: "geojson", 18 | } 19 | 20 | s.Require().NoError(cmd.Run()) 21 | data := s.readStdout() 22 | 23 | collection := &geo.FeatureCollection{} 24 | s.Require().NoError(json.Unmarshal(data, collection)) 25 | s.Len(collection.Features, 5) 26 | } 27 | 28 | func (s *Suite) TestConvertGeoJSONToGeoParquetStdout() { 29 | cmd := &command.ConvertCmd{ 30 | From: "auto", 31 | Input: "../../../internal/geojson/testdata/example.geojson", 32 | To: "parquet", 33 | } 34 | 35 | s.Require().NoError(cmd.Run()) 36 | data := s.readStdout() 37 | 38 | fileReader, err := file.NewParquetReader(bytes.NewReader(data)) 39 | s.Require().NoError(err) 40 | defer func() { _ = fileReader.Close() }() 41 | 42 | s.Equal(int64(5), fileReader.NumRows()) 43 | } 44 | 45 | func (s *Suite) TestConvertGeoParquetToUnknownStdout() { 46 | cmd := &command.ConvertCmd{ 47 | From: "auto", 48 | Input: "../../../internal/testdata/cases/example-v1.0.0.parquet", 49 | } 50 | 51 | s.ErrorContains(cmd.Run(), "when writing to stdout, the --to option must be provided") 52 | } 53 | 54 | func (s *Suite) TestConvertGeoJSONStdinToGeoParquetStdout() { 55 | s.writeStdin([]byte(`{ 56 | "type": "FeatureCollection", 57 | "features": [ 58 | { 59 | "type": "Feature", 60 | "properties": { 61 | "name": "Null Island" 62 | }, 63 | "geometry": { 64 | "type": "Point", 65 | "coordinates": [0, 0] 66 | } 67 | } 68 | ] 69 | }`)) 70 | 71 | cmd := &command.ConvertCmd{ 72 | From: "geojson", 73 | To: "geoparquet", 74 | } 75 | 76 | s.Require().NoError(cmd.Run()) 77 | data := s.readStdout() 78 | 79 | fileReader, err := file.NewParquetReader(bytes.NewReader(data)) 80 | s.Require().NoError(err) 81 | defer func() { _ = fileReader.Close() }() 82 | 83 | s.Equal(int64(1), fileReader.NumRows()) 84 | } 85 | 86 | func (s *Suite) TestConvertGeoParquetStdinToGeoJSONStdout() { 87 | s.writeStdin(test.GeoParquetFromJSON(s.T(), `{ 88 | "type": "FeatureCollection", 89 | "features": [ 90 | { 91 | "type": "Feature", 92 | "properties": { 93 | "name": "Null Island" 94 | }, 95 | "geometry": { 96 | "type": "Point", 97 | "coordinates": [0, 0] 98 | } 99 | } 100 | ] 101 | }`)) 102 | 103 | cmd := &command.ConvertCmd{ 104 | From: "geoparquet", 105 | To: "geojson", 106 | } 107 | 108 | s.Require().NoError(cmd.Run()) 109 | data := s.readStdout() 110 | 111 | collection := &geo.FeatureCollection{} 112 | s.Require().NoError(json.Unmarshal(data, collection)) 113 | s.Len(collection.Features, 1) 114 | } 115 | 116 | func (s *Suite) TestConvertUnknownStdinToGeoParquetStdout() { 117 | cmd := &command.ConvertCmd{ 118 | To: "geoparquet", 119 | } 120 | 121 | s.ErrorContains(cmd.Run(), "when reading from stdin, the --from option must be provided") 122 | } 123 | 124 | func (s *Suite) TestConvertGeoParquetUrlToGeoJSONStdout() { 125 | cmd := &command.ConvertCmd{ 126 | Input: s.server.URL + "/testdata/cases/example-v1.0.0.parquet", 127 | To: "geojson", 128 | } 129 | 130 | s.Require().NoError(cmd.Run()) 131 | data := s.readStdout() 132 | 133 | collection := &geo.FeatureCollection{} 134 | s.Require().NoError(json.Unmarshal(data, collection)) 135 | s.Len(collection.Features, 5) 136 | } 137 | -------------------------------------------------------------------------------- /cmd/gpq/command/validate.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Planet Labs PBC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package command 16 | 17 | import ( 18 | "context" 19 | "encoding/json" 20 | "fmt" 21 | "os" 22 | "strings" 23 | 24 | "github.com/alecthomas/kong" 25 | "github.com/fatih/color" 26 | "github.com/planetlabs/gpq/internal/validator" 27 | ) 28 | 29 | type ValidateCmd struct { 30 | Input string `arg:"" optional:"" name:"input" help:"Path or URL for a GeoParquet file. If not provided, input is read from stdin."` 31 | MetadataOnly bool `help:"Only run rules that apply to file metadata and schema (no data will be scanned)."` 32 | Unpretty bool `help:"No colors in text output, no newlines and indentation in JSON output."` 33 | Format string `help:"Report format. Possible values: ${enum}." enum:"text, json" default:"text"` 34 | } 35 | 36 | func (c *ValidateCmd) Run(ctx *kong.Context) error { 37 | input, inputErr := readerFromInput(c.Input) 38 | if inputErr != nil { 39 | return NewCommandError("trouble getting a reader from %q: %w", c.Input, inputErr) 40 | } 41 | 42 | inputName := c.Input 43 | if inputName == "" { 44 | inputName = "" 45 | } 46 | v := validator.New(c.MetadataOnly) 47 | report, err := v.Validate(context.Background(), input, inputName) 48 | if err != nil { 49 | return NewCommandError("validation failed: %w", err) 50 | } 51 | 52 | valid := true 53 | for _, check := range report.Checks { 54 | if !check.Passed { 55 | valid = false 56 | break 57 | } 58 | } 59 | 60 | if c.Format == "json" { 61 | if err := c.formatJSON(report); err != nil { 62 | return NewCommandError("unable to format report as json: %w", err) 63 | } 64 | } else { 65 | if err := c.formatText(report); err != nil { 66 | return NewCommandError("unable to format report: %w", err) 67 | } 68 | } 69 | 70 | if !valid { 71 | ctx.Exit(1) 72 | } 73 | return nil 74 | } 75 | 76 | func (c *ValidateCmd) formatJSON(report *validator.Report) error { 77 | encoder := json.NewEncoder(os.Stdout) 78 | if !c.Unpretty { 79 | encoder.SetIndent("", " ") 80 | encoder.SetEscapeHTML(false) 81 | } 82 | 83 | return encoder.Encode(report) 84 | } 85 | 86 | func (c *ValidateCmd) formatText(report *validator.Report) error { 87 | passed := 0 88 | failed := 0 89 | unrun := 0 90 | for _, check := range report.Checks { 91 | if !check.Run { 92 | unrun++ 93 | } else if check.Passed { 94 | passed++ 95 | } else { 96 | failed++ 97 | } 98 | } 99 | 100 | summaries := []string{ 101 | fmt.Sprintf("Passed %d check%s", passed, maybeS(passed)), 102 | } 103 | if failed > 0 { 104 | summaries = append(summaries, fmt.Sprintf("failed %d check%s", failed, maybeS(failed))) 105 | } 106 | if unrun > 0 { 107 | summaries = append(summaries, fmt.Sprintf("%d check%s not run", unrun, maybeS(unrun))) 108 | } 109 | 110 | if c.Unpretty { 111 | color.NoColor = true 112 | } 113 | 114 | fmt.Printf("\nSummary: %s.\n\n", strings.Join(summaries, ", ")) 115 | if report.MetadataOnly { 116 | skipped := len(validator.DataScanningRules()) 117 | color.Yellow("Metadata and schema checks only. Skipped %d data scanning check%s.\n\n", skipped, maybeS(skipped)) 118 | } 119 | 120 | passPrefix := " ✓" 121 | failPrefix := " ✗" 122 | unrunPrefix := " !" 123 | reasonPrefix := " ↳" 124 | for _, check := range report.Checks { 125 | if !check.Run { 126 | color.Yellow("%s %s", unrunPrefix, check.Title) 127 | color.Yellow("%s %s", reasonPrefix, "not checked") 128 | continue 129 | } 130 | 131 | if check.Passed { 132 | color.Green("%s %s", passPrefix, check.Title) 133 | continue 134 | } 135 | 136 | color.Red("%s %s", failPrefix, check.Title) 137 | color.Red("%s %s", reasonPrefix, check.Message) 138 | } 139 | fmt.Println() 140 | 141 | return nil 142 | } 143 | 144 | func maybeS(count int) string { 145 | if count == 1 { 146 | return "" 147 | } 148 | return "s" 149 | } 150 | -------------------------------------------------------------------------------- /cmd/gpq/command/version.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Planet Labs PBC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package command 16 | 17 | import "fmt" 18 | 19 | type VersionCmd struct { 20 | Detail bool `help:"Include detail about the commit and build date."` 21 | } 22 | 23 | type VersionInfo struct { 24 | Version string 25 | Commit string 26 | Date string 27 | } 28 | 29 | func (c *VersionCmd) Run(info *VersionInfo) error { 30 | output := info.Version 31 | if c.Detail { 32 | output = fmt.Sprintf("%s (%s %s)", output, info.Commit, info.Date) 33 | } 34 | fmt.Println(output) 35 | return nil 36 | } 37 | -------------------------------------------------------------------------------- /cmd/gpq/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Planet Labs PBC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | "errors" 19 | 20 | "github.com/alecthomas/kong" 21 | "github.com/planetlabs/gpq/cmd/gpq/command" 22 | ) 23 | 24 | var ( 25 | version = "dev" 26 | commit = "none" 27 | date = "unknown" 28 | ) 29 | 30 | func main() { 31 | ctx := kong.Parse(&command.CLI) 32 | err := ctx.Run(ctx, &command.VersionInfo{Version: version, Commit: commit, Date: date}) 33 | if err == nil { 34 | return 35 | } 36 | var commandError *command.CommandError 37 | if errors.As(err, &commandError) { 38 | err = commandError 39 | } 40 | ctx.FatalIfErrorf(err) 41 | } 42 | -------------------------------------------------------------------------------- /cmd/wasm/main.go: -------------------------------------------------------------------------------- 1 | //go:build js && wasm 2 | 3 | // Copyright 2023 Planet Labs PBC 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package main 18 | 19 | import ( 20 | "bytes" 21 | "strings" 22 | "syscall/js" 23 | 24 | "github.com/apache/arrow/go/v16/parquet/file" 25 | "github.com/planetlabs/gpq/internal/geojson" 26 | "github.com/planetlabs/gpq/internal/geoparquet" 27 | "github.com/planetlabs/gpq/internal/pqutil" 28 | ) 29 | 30 | var uint8ArrayConstructor = js.Global().Get("Uint8Array") 31 | 32 | const ( 33 | errorKey = "error" 34 | valueKey = "value" 35 | ) 36 | 37 | func returnFromErrorMessage(message string) map[string]any { 38 | return map[string]any{errorKey: message} 39 | } 40 | 41 | func returnFromError(err error) map[string]any { 42 | return returnFromErrorMessage(err.Error()) 43 | } 44 | 45 | func returnFromValue(value any) map[string]any { 46 | return map[string]any{valueKey: value} 47 | } 48 | 49 | var fromParquet = js.FuncOf(func(this js.Value, args []js.Value) any { 50 | if len(args) != 1 { 51 | return returnFromErrorMessage("Must be called with a single argument") 52 | } 53 | if !args[0].InstanceOf(uint8ArrayConstructor) { 54 | return returnFromErrorMessage("Must be called with a Uint8Array") 55 | } 56 | 57 | numBytes := args[0].Length() 58 | data := make([]byte, numBytes) 59 | js.CopyBytesToGo(data, args[0]) 60 | 61 | output := &bytes.Buffer{} 62 | convertErr := geojson.FromParquet(bytes.NewReader(data), output) 63 | if convertErr != nil { 64 | return returnFromError(convertErr) 65 | } 66 | 67 | reader, readerErr := file.NewParquetReader(bytes.NewReader(data)) 68 | if readerErr != nil { 69 | return returnFromError(readerErr) 70 | } 71 | defer func() { _ = reader.Close() }() 72 | 73 | metadata, metadataErr := geoparquet.GetMetadataValue(reader.MetaData().KeyValueMetadata()) 74 | if metadataErr != nil { 75 | return returnFromError(metadataErr) 76 | } 77 | 78 | return returnFromValue(map[string]any{ 79 | "data": output.String(), 80 | "geo": metadata, 81 | "schema": pqutil.ParquetSchemaString(reader.MetaData().Schema), 82 | "records": reader.NumRows(), 83 | }) 84 | }) 85 | 86 | var toParquet = js.FuncOf(func(this js.Value, args []js.Value) any { 87 | if len(args) != 1 { 88 | return returnFromErrorMessage("Must be called with a single argument") 89 | } 90 | if args[0].Type() != js.TypeString { 91 | return returnFromErrorMessage("Must be called with a string") 92 | } 93 | 94 | input := strings.NewReader(args[0].String()) 95 | output := &bytes.Buffer{} 96 | convertErr := geojson.ToParquet(input, output, &geojson.ConvertOptions{ 97 | MinFeatures: 10, MaxFeatures: 250, Compression: "zstd", 98 | }) 99 | 100 | if convertErr != nil { 101 | return returnFromError(convertErr) 102 | } 103 | 104 | reader, readerErr := file.NewParquetReader(bytes.NewReader(output.Bytes())) 105 | if readerErr != nil { 106 | return returnFromError(readerErr) 107 | } 108 | 109 | metadata, metadataErr := geoparquet.GetMetadataValue(reader.MetaData().KeyValueMetadata()) 110 | if metadataErr != nil { 111 | return returnFromError(metadataErr) 112 | } 113 | 114 | array := uint8ArrayConstructor.New(output.Len()) 115 | js.CopyBytesToJS(array, output.Bytes()) 116 | 117 | return returnFromValue(map[string]any{ 118 | "data": array, 119 | "geo": metadata, 120 | "schema": pqutil.ParquetSchemaString(reader.MetaData().Schema), 121 | "records": reader.NumRows(), 122 | }) 123 | }) 124 | 125 | func main() { 126 | exports := map[string]interface{}{ 127 | "fromParquet": fromParquet, 128 | "toParquet": toParquet, 129 | } 130 | js.Global().Get("Go").Set("exports", exports) 131 | <-make(chan struct{}) 132 | } 133 | -------------------------------------------------------------------------------- /internal/geo/geo_test.go: -------------------------------------------------------------------------------- 1 | package geo 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestBboxIntersectsTrue(t *testing.T) { 11 | box1 := &Bbox{ 12 | Xmin: 10, 13 | Ymin: 20, 14 | Xmax: 30, 15 | Ymax: 40, 16 | } 17 | 18 | box2 := &Bbox{ 19 | Xmin: 25, 20 | Ymin: 35, 21 | Xmax: 45, 22 | Ymax: 55, 23 | } 24 | 25 | require.Equal(t, true, box1.Intersects(box2)) 26 | } 27 | 28 | func TestBboxIntersectsFalse(t *testing.T) { 29 | box1 := &Bbox{ 30 | Xmin: -10, 31 | Ymin: 20, 32 | Xmax: -5, 33 | Ymax: 40, 34 | } 35 | 36 | box2 := &Bbox{ 37 | Xmin: -1, 38 | Ymin: 50, 39 | Xmax: 0, 40 | Ymax: 70, 41 | } 42 | 43 | require.Equal(t, false, box1.Intersects(box2)) 44 | } 45 | 46 | func TestBboxIntersectsTouches(t *testing.T) { 47 | box1 := &Bbox{ 48 | Xmin: 10, 49 | Ymin: 20, 50 | Xmax: 30, 51 | Ymax: 40, 52 | } 53 | 54 | box2 := &Bbox{ 55 | Xmin: 30, 56 | Ymin: 20, 57 | Xmax: 40, 58 | Ymax: 40, 59 | } 60 | 61 | require.Equal(t, true, box1.Intersects(box2)) 62 | } 63 | 64 | func TestBboxIntersectsWholeGlobe(t *testing.T) { 65 | box1 := &Bbox{ 66 | Xmin: -180, 67 | Ymin: -90, 68 | Xmax: 180, 69 | Ymax: 90, 70 | } 71 | 72 | box2 := &Bbox{ 73 | Xmin: 10, 74 | Ymin: 10, 75 | Xmax: 30, 76 | Ymax: 30, 77 | } 78 | 79 | require.Equal(t, true, box1.Intersects(box2)) 80 | } 81 | 82 | func TestBboxIntersectsContains(t *testing.T) { 83 | box1 := &Bbox{ 84 | Xmin: 10, 85 | Ymin: 10, 86 | Xmax: 30, 87 | Ymax: 30, 88 | } 89 | 90 | box2 := &Bbox{ 91 | Xmin: 0, 92 | Ymin: 0, 93 | Xmax: 40, 94 | Ymax: 40, 95 | } 96 | 97 | require.Equal(t, true, box1.Intersects(box2)) 98 | } 99 | 100 | func TestBboxIntersectsTrueAntimeridian(t *testing.T) { 101 | box1 := &Bbox{ 102 | Xmin: 170, 103 | Ymin: -10, 104 | Xmax: -165, 105 | Ymax: 10, 106 | } 107 | 108 | box2 := &Bbox{ 109 | Xmin: -180, 110 | Ymin: -5, 111 | Xmax: -170, 112 | Ymax: 15, 113 | } 114 | 115 | require.Equal(t, true, box1.Intersects(box2)) 116 | } 117 | 118 | func TestBboxIntersectsFalseAntimeridian(t *testing.T) { 119 | box1 := &Bbox{ 120 | Xmin: 170, 121 | Ymin: -10, 122 | Xmax: 180, 123 | Ymax: 10, 124 | } 125 | 126 | box2 := &Bbox{ 127 | Xmin: -160, 128 | Ymin: -5, 129 | Xmax: -150, 130 | Ymax: 15, 131 | } 132 | 133 | require.Equal(t, false, box1.Intersects(box2)) 134 | } 135 | 136 | func TestNewBboxFromString(t *testing.T) { 137 | bbox, err := NewBboxFromString("-160,-5,-150,15") 138 | assert.NoError(t, err) 139 | assert.Equal(t, -160.0, bbox.Xmin) 140 | assert.Equal(t, -5.0, bbox.Ymin) 141 | assert.Equal(t, -150.0, bbox.Xmax) 142 | assert.Equal(t, 15.0, bbox.Ymax) 143 | } 144 | 145 | func TestNewBboxFromStringErrNotEnoughValues(t *testing.T) { 146 | bbox, err := NewBboxFromString("-160,-5,-150") 147 | assert.ErrorContains(t, err, "please provide 4") 148 | assert.Nil(t, bbox) 149 | } 150 | 151 | func TestNewBboxFromStringErrWrongType(t *testing.T) { 152 | bbox, err := NewBboxFromString("foo,-5,-150,15") 153 | assert.ErrorContains(t, err, "float") 154 | assert.Nil(t, bbox) 155 | } 156 | -------------------------------------------------------------------------------- /internal/geojson/geojson.go: -------------------------------------------------------------------------------- 1 | package geojson 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | 7 | "github.com/apache/arrow/go/v16/parquet" 8 | "github.com/planetlabs/gpq/internal/geo" 9 | "github.com/planetlabs/gpq/internal/geoparquet" 10 | "github.com/planetlabs/gpq/internal/pqutil" 11 | ) 12 | 13 | const primaryColumn = "geometry" 14 | 15 | func GetDefaultMetadata() *geoparquet.Metadata { 16 | return &geoparquet.Metadata{ 17 | Version: geoparquet.Version, 18 | PrimaryColumn: primaryColumn, 19 | Columns: map[string]*geoparquet.GeometryColumn{ 20 | primaryColumn: { 21 | Encoding: "WKB", 22 | GeometryTypes: []string{}, 23 | }, 24 | }, 25 | } 26 | } 27 | 28 | func FromParquet(reader parquet.ReaderAtSeeker, writer io.Writer) error { 29 | recordReader, rrErr := geoparquet.NewRecordReaderFromConfig(&geoparquet.ReaderConfig{ 30 | Reader: reader, 31 | }) 32 | if rrErr != nil { 33 | return rrErr 34 | } 35 | defer func() { _ = recordReader.Close() }() 36 | 37 | geoMetadata := recordReader.Metadata() 38 | 39 | jsonWriter, jsonErr := NewRecordWriter(writer, geoMetadata) 40 | if jsonErr != nil { 41 | return jsonErr 42 | } 43 | 44 | for { 45 | record, readErr := recordReader.Read() 46 | if readErr == io.EOF { 47 | break 48 | } 49 | if readErr != nil { 50 | return readErr 51 | } 52 | if err := jsonWriter.Write(record); err != nil { 53 | return err 54 | } 55 | } 56 | 57 | return jsonWriter.Close() 58 | } 59 | 60 | type ConvertOptions struct { 61 | MinFeatures int 62 | MaxFeatures int 63 | Compression string 64 | RowGroupLength int 65 | Metadata string 66 | } 67 | 68 | var defaultOptions = &ConvertOptions{ 69 | MinFeatures: 1, 70 | MaxFeatures: 50, 71 | Compression: "zstd", 72 | } 73 | 74 | func ToParquet(input io.Reader, output io.Writer, convertOptions *ConvertOptions) error { 75 | if convertOptions == nil { 76 | convertOptions = defaultOptions 77 | } 78 | reader := NewFeatureReader(input) 79 | buffer := []*geo.Feature{} 80 | builder := pqutil.NewArrowSchemaBuilder() 81 | featuresRead := 0 82 | 83 | var pqWriterProps *parquet.WriterProperties 84 | var writerOptions []parquet.WriterProperty 85 | if convertOptions.Compression != "" { 86 | compression, err := pqutil.GetCompression(convertOptions.Compression) 87 | if err != nil { 88 | return err 89 | } 90 | writerOptions = append(writerOptions, parquet.WithCompression(compression)) 91 | } 92 | if convertOptions.RowGroupLength > 0 { 93 | writerOptions = append(writerOptions, parquet.WithMaxRowGroupLength(int64(convertOptions.RowGroupLength))) 94 | } 95 | if len(writerOptions) > 0 { 96 | pqWriterProps = parquet.NewWriterProperties(writerOptions...) 97 | } 98 | 99 | var featureWriter *geoparquet.FeatureWriter 100 | writeBuffered := func() error { 101 | if !builder.Ready() { 102 | return fmt.Errorf("failed to create schema after reading %d features", len(buffer)) 103 | } 104 | if err := builder.AddGeometry(geoparquet.DefaultGeometryColumn, geoparquet.DefaultGeometryEncoding); err != nil { 105 | return err 106 | } 107 | sc, scErr := builder.Schema() 108 | if scErr != nil { 109 | return scErr 110 | } 111 | fw, fwErr := geoparquet.NewFeatureWriter(&geoparquet.WriterConfig{ 112 | Writer: output, 113 | ArrowSchema: sc, 114 | ParquetWriterProps: pqWriterProps, 115 | }) 116 | if fwErr != nil { 117 | return fwErr 118 | } 119 | 120 | for _, buffered := range buffer { 121 | if err := fw.Write(buffered); err != nil { 122 | return err 123 | } 124 | } 125 | featureWriter = fw 126 | return nil 127 | } 128 | 129 | for { 130 | feature, err := reader.Read() 131 | if err == io.EOF { 132 | break 133 | } 134 | if err != nil { 135 | return err 136 | } 137 | featuresRead += 1 138 | if featureWriter == nil { 139 | if err := builder.Add(feature.Properties); err != nil { 140 | return err 141 | } 142 | 143 | if !builder.Ready() { 144 | buffer = append(buffer, feature) 145 | if len(buffer) > convertOptions.MaxFeatures { 146 | return fmt.Errorf("failed to create parquet schema after reading %d features", convertOptions.MaxFeatures) 147 | } 148 | continue 149 | } 150 | 151 | if len(buffer) < convertOptions.MinFeatures-1 { 152 | buffer = append(buffer, feature) 153 | continue 154 | } 155 | 156 | if err := writeBuffered(); err != nil { 157 | return err 158 | } 159 | } 160 | if err := featureWriter.Write(feature); err != nil { 161 | return err 162 | } 163 | } 164 | if featuresRead > 0 { 165 | if featureWriter == nil { 166 | if err := writeBuffered(); err != nil { 167 | return err 168 | } 169 | } 170 | return featureWriter.Close() 171 | } 172 | return nil 173 | } 174 | -------------------------------------------------------------------------------- /internal/geojson/recordwriter.go: -------------------------------------------------------------------------------- 1 | package geojson 2 | 3 | import ( 4 | "encoding/json" 5 | "io" 6 | 7 | "github.com/apache/arrow/go/v16/arrow" 8 | "github.com/apache/arrow/go/v16/arrow/array" 9 | orbjson "github.com/paulmach/orb/geojson" 10 | "github.com/planetlabs/gpq/internal/geo" 11 | "github.com/planetlabs/gpq/internal/geoparquet" 12 | ) 13 | 14 | type RecordWriter struct { 15 | geoMetadata *geoparquet.Metadata 16 | writer io.Writer 17 | writing bool 18 | } 19 | 20 | func NewRecordWriter(writer io.Writer, geoMetadata *geoparquet.Metadata) (*RecordWriter, error) { 21 | w := &RecordWriter{writer: writer, geoMetadata: geoMetadata} 22 | return w, nil 23 | } 24 | 25 | var ( 26 | featureCollectionPrefix = []byte(`{"type":"FeatureCollection","features":[`) 27 | arraySeparator = []byte(",") 28 | featureCollectionSuffix = []byte("]}") 29 | ) 30 | 31 | func (w *RecordWriter) Write(record arrow.Record) error { 32 | if !w.writing { 33 | if _, err := w.writer.Write(featureCollectionPrefix); err != nil { 34 | return err 35 | } 36 | w.writing = true 37 | } else { 38 | if _, err := w.writer.Write(arraySeparator); err != nil { 39 | return err 40 | } 41 | } 42 | arr := array.RecordToStructArray(record) 43 | defer arr.Release() 44 | 45 | schema := record.Schema() 46 | for rowNum := 0; rowNum < arr.Len(); rowNum += 1 { 47 | if rowNum > 0 { 48 | if _, err := w.writer.Write(arraySeparator); err != nil { 49 | return err 50 | } 51 | } 52 | 53 | var geometry *orbjson.Geometry 54 | properties := map[string]any{} 55 | for fieldNum := 0; fieldNum < arr.NumField(); fieldNum += 1 { 56 | value := arr.Field(fieldNum).GetOneForMarshal(rowNum) 57 | name := schema.Field(fieldNum).Name 58 | if geomColumn, ok := w.geoMetadata.Columns[name]; ok { 59 | g, decodeErr := geo.DecodeGeometry(value, geomColumn.Encoding) 60 | if decodeErr != nil { 61 | return decodeErr 62 | } 63 | if name == w.geoMetadata.PrimaryColumn { 64 | geometry = g 65 | continue 66 | } 67 | properties[name] = g 68 | continue 69 | } 70 | properties[name] = value 71 | } 72 | 73 | feature := map[string]any{ 74 | "type": "Feature", 75 | "properties": properties, 76 | "geometry": geometry, 77 | } 78 | 79 | featureData, jsonErr := json.Marshal(feature) 80 | if jsonErr != nil { 81 | return jsonErr 82 | } 83 | if _, err := w.writer.Write(featureData); err != nil { 84 | return err 85 | } 86 | } 87 | 88 | return nil 89 | } 90 | 91 | func (w *RecordWriter) Close() error { 92 | if w.writing { 93 | if _, err := w.writer.Write(featureCollectionSuffix); err != nil { 94 | return err 95 | } 96 | w.writing = false 97 | } 98 | 99 | closer, ok := w.writer.(io.Closer) 100 | if ok { 101 | return closer.Close() 102 | } 103 | return nil 104 | } 105 | -------------------------------------------------------------------------------- /internal/geojson/testdata/all-null-geom.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "geometry": null, 7 | "properties": { 8 | "place": "nowhere" 9 | } 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/array-id.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "Feature", 3 | "id": [ 4 | "bad" 5 | ], 6 | "geometry": { 7 | "type": "Point", 8 | "coordinates": [ 9 | 1, 10 | 2 11 | ] 12 | }, 13 | "properties": { 14 | "name": "test" 15 | } 16 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/array.json: -------------------------------------------------------------------------------- 1 | [ 2 | "not", "geojson" 3 | ] -------------------------------------------------------------------------------- /internal/geojson/testdata/bad-collection.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "geometry": { 7 | "type": "Point", 8 | "coordinates": [ 9 | 0, 10 | 0 11 | ] 12 | }, 13 | "properties": { 14 | "place": "null island" 15 | } 16 | }, 17 | { 18 | "type": "Feature", 19 | "geometry": { 20 | "invalid": true 21 | }, 22 | "properties": { 23 | "place": "bad geometry" 24 | } 25 | } 26 | ] 27 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/boolean-id.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "Feature", 3 | "id": true, 4 | "geometry": { 5 | "type": "Point", 6 | "coordinates": [ 7 | 1, 8 | 2 9 | ] 10 | }, 11 | "properties": { 12 | "name": "test" 13 | } 14 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/empty-collection.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [] 4 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/extra-array.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "geometry": { 3 | "coordinates": [ 4 | 1, 5 | 2 6 | ], 7 | "type": "Point" 8 | }, 9 | "type": "Feature", 10 | "extra": [ 11 | "ignore", 12 | [ 13 | "also ignore", 14 | [ 15 | "this too" 16 | ] 17 | ], 18 | { 19 | "complex": [ 20 | true 21 | ] 22 | } 23 | ], 24 | "properties": { 25 | "name": "test" 26 | } 27 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/extra-object.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "geometry": { 3 | "coordinates": [ 4 | 1, 5 | 2 6 | ], 7 | "type": "Point" 8 | }, 9 | "type": "Feature", 10 | "extra": { 11 | "ignore": "this", 12 | "nested": { 13 | "ignore": "this too", 14 | "and": [ 15 | { 16 | "also": "this" 17 | } 18 | ] 19 | } 20 | }, 21 | "properties": { 22 | "name": "test" 23 | } 24 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/feature.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "geometry": { 3 | "coordinates": [ 4 | 1, 5 | 2 6 | ], 7 | "type": "Point" 8 | }, 9 | "type": "Feature", 10 | "properties": { 11 | "name": "test" 12 | } 13 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/mismatched-types.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "geometry": { 7 | "type": "Point", 8 | "coordinates": [ 9 | 0, 10 | 0 11 | ] 12 | }, 13 | "properties": { 14 | "stringProperty": "A string" 15 | } 16 | }, 17 | { 18 | "type": "Feature", 19 | "geometry": { 20 | "type": "Point", 21 | "coordinates": [ 22 | 0, 23 | 0 24 | ] 25 | }, 26 | "properties": { 27 | "stringProperty": 42 28 | } 29 | } 30 | ] 31 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/nested-props.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "geometry": { 7 | "type": "Point", 8 | "coordinates": [ 9 | 0, 10 | 0 11 | ] 12 | }, 13 | "properties": { 14 | "nested": { 15 | "soup": "chicken", 16 | "salad": "caesar" 17 | } 18 | } 19 | }, 20 | { 21 | "type": "Feature", 22 | "geometry": { 23 | "type": "Point", 24 | "coordinates": [ 25 | 0, 26 | 0 27 | ] 28 | }, 29 | "properties": { 30 | "nested": { 31 | "soup": "tomato", 32 | "salad": "jello" 33 | } 34 | } 35 | } 36 | ] 37 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/not-geojson.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "Something", 3 | "data": true 4 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/null-geom.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "geometry": { 7 | "type": "Point", 8 | "coordinates": [ 9 | 0, 10 | 0 11 | ] 12 | }, 13 | "properties": { 14 | "place": "null island" 15 | } 16 | }, 17 | { 18 | "type": "Feature", 19 | "geometry": null, 20 | "properties": { 21 | "place": "nowhere" 22 | } 23 | } 24 | ] 25 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/number-id.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "Feature", 3 | "id": 1, 4 | "geometry": { 5 | "type": "Point", 6 | "coordinates": [ 7 | 1, 8 | 2 9 | ] 10 | }, 11 | "properties": { 12 | "name": "test" 13 | } 14 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/object-id.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "Feature", 3 | "id": { 4 | "oops": true 5 | }, 6 | "geometry": { 7 | "type": "Point", 8 | "coordinates": [ 9 | 1, 10 | 2 11 | ] 12 | }, 13 | "properties": { 14 | "name": "test" 15 | } 16 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/point-geometry.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "coordinates": [1, 2], 3 | "type": "Point" 4 | } 5 | -------------------------------------------------------------------------------- /internal/geojson/testdata/repeated-props.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "geometry": { 7 | "type": "Point", 8 | "coordinates": [ 9 | 0, 10 | 0 11 | ] 12 | }, 13 | "properties": { 14 | "numbers": [ 15 | 1, 16 | 2, 17 | 3 18 | ], 19 | "strings": [ 20 | "one", 21 | "two" 22 | ], 23 | "objects": [ 24 | { 25 | "a": 1, 26 | "b": 2 27 | }, 28 | { 29 | "a": 3, 30 | "b": 4 31 | } 32 | ] 33 | } 34 | }, 35 | { 36 | "type": "Feature", 37 | "geometry": { 38 | "type": "Point", 39 | "coordinates": [ 40 | 0, 41 | 0 42 | ] 43 | }, 44 | "properties": { 45 | "numbers": [ 46 | 4, 47 | 5, 48 | 6 49 | ], 50 | "strings": [ 51 | "three", 52 | "four" 53 | ], 54 | "objects": [ 55 | { 56 | "a": 5, 57 | "b": 6 58 | }, 59 | { 60 | "a": 7, 61 | "b": 8 62 | } 63 | ] 64 | } 65 | } 66 | ] 67 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/sparse-properties.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "geometry": null, 7 | "properties": { 8 | "first": "one", 9 | "second": null, 10 | "third": null 11 | } 12 | }, 13 | { 14 | "type": "Feature", 15 | "geometry": null, 16 | "properties": { 17 | "first": null, 18 | "second": "two", 19 | "third": null 20 | } 21 | }, 22 | { 23 | "type": "Feature", 24 | "geometry": null, 25 | "properties": { 26 | "first": null, 27 | "second": null, 28 | "third": "three" 29 | } 30 | } 31 | ] 32 | } 33 | -------------------------------------------------------------------------------- /internal/geojson/testdata/string-id.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "Feature", 3 | "id": "feature-1", 4 | "geometry": { 5 | "type": "Point", 6 | "coordinates": [ 7 | 1, 8 | 2 9 | ] 10 | }, 11 | "properties": { 12 | "name": "test" 13 | } 14 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/ten-points.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "properties": { 7 | "num": 0 8 | }, 9 | "geometry": { 10 | "type": "Point", 11 | "coordinates": [0, 0] 12 | } 13 | }, 14 | { 15 | "type": "Feature", 16 | "properties": { 17 | "num": 1 18 | }, 19 | "geometry": { 20 | "type": "Point", 21 | "coordinates": [1, 1] 22 | } 23 | }, 24 | { 25 | "type": "Feature", 26 | "properties": { 27 | "num": 2 28 | }, 29 | "geometry": { 30 | "type": "Point", 31 | "coordinates": [2, 2] 32 | } 33 | }, 34 | { 35 | "type": "Feature", 36 | "properties": { 37 | "num": 3 38 | }, 39 | "geometry": { 40 | "type": "Point", 41 | "coordinates": [3, 3] 42 | } 43 | }, 44 | { 45 | "type": "Feature", 46 | "properties": { 47 | "num": 4 48 | }, 49 | "geometry": { 50 | "type": "Point", 51 | "coordinates": [4, 4] 52 | } 53 | }, 54 | { 55 | "type": "Feature", 56 | "properties": { 57 | "num": 5 58 | }, 59 | "geometry": { 60 | "type": "Point", 61 | "coordinates": [5, 5] 62 | } 63 | }, 64 | { 65 | "type": "Feature", 66 | "properties": { 67 | "num": 6 68 | }, 69 | "geometry": { 70 | "type": "Point", 71 | "coordinates": [6, 6] 72 | } 73 | }, 74 | { 75 | "type": "Feature", 76 | "properties": { 77 | "num": 7 78 | }, 79 | "geometry": { 80 | "type": "Point", 81 | "coordinates": [7, 7] 82 | } 83 | }, 84 | { 85 | "type": "Feature", 86 | "properties": { 87 | "num": 8 88 | }, 89 | "geometry": { 90 | "type": "Point", 91 | "coordinates": [8, 8] 92 | } 93 | }, 94 | { 95 | "type": "Feature", 96 | "properties": { 97 | "num": 9 98 | }, 99 | "geometry": { 100 | "type": "Point", 101 | "coordinates": [9, 9] 102 | } 103 | } 104 | ] 105 | } -------------------------------------------------------------------------------- /internal/geojson/testdata/with-crs.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "name": "demo", 4 | "crs": { 5 | "type": "name", 6 | "properties": { 7 | "name": "urn:ogc:def:crs:OGC:1.3:CRS84" 8 | } 9 | }, 10 | "features": [ 11 | { 12 | "type": "Feature", 13 | "properties": {}, 14 | "geometry": { 15 | "type": "Polygon", 16 | "coordinates": [ 17 | [ 18 | [ 19 | 110.189014185542902, 20 | 22.656808012935379 21 | ], 22 | [ 23 | 110.203307696738648, 24 | 22.668788827097494 25 | ], 26 | [ 27 | 110.213968273838759, 28 | 22.658264452155809 29 | ], 30 | [ 31 | 110.195595156322597, 32 | 22.65188898133686 33 | ], 34 | [ 35 | 110.195595156322597, 36 | 22.65188898133686 37 | ], 38 | [ 39 | 110.189014185542902, 40 | 22.656808012935379 41 | ] 42 | ] 43 | ] 44 | } 45 | } 46 | ] 47 | } -------------------------------------------------------------------------------- /internal/geoparquet/recordreader.go: -------------------------------------------------------------------------------- 1 | package geoparquet 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "slices" 8 | 9 | "github.com/apache/arrow/go/v16/arrow" 10 | "github.com/apache/arrow/go/v16/arrow/memory" 11 | "github.com/apache/arrow/go/v16/parquet" 12 | "github.com/apache/arrow/go/v16/parquet/file" 13 | "github.com/apache/arrow/go/v16/parquet/pqarrow" 14 | "github.com/apache/arrow/go/v16/parquet/schema" 15 | ) 16 | 17 | const ( 18 | defaultReadBatchSize = 1024 19 | ) 20 | 21 | type ReaderConfig struct { 22 | BatchSize int 23 | Reader parquet.ReaderAtSeeker 24 | File *file.Reader 25 | Context context.Context 26 | Columns []int 27 | RowGroups []int 28 | } 29 | 30 | type RecordReader struct { 31 | fileReader *file.Reader 32 | metadata *Metadata 33 | recordReader pqarrow.RecordReader 34 | } 35 | 36 | func NewParquetFileReader(config *ReaderConfig) (*file.Reader, error) { 37 | fileReader := config.File 38 | if fileReader == nil { 39 | if config.Reader == nil { 40 | return nil, errors.New("config must include a File or Reader value") 41 | } 42 | fr, frErr := file.NewParquetReader(config.Reader) 43 | if frErr != nil { 44 | return nil, frErr 45 | } 46 | fileReader = fr 47 | } 48 | return fileReader, nil 49 | } 50 | 51 | func NewArrowFileReader(config *ReaderConfig, parquetReader *file.Reader) (*pqarrow.FileReader, error) { 52 | batchSize := config.BatchSize 53 | if batchSize == 0 { 54 | batchSize = defaultReadBatchSize 55 | } 56 | 57 | return pqarrow.NewFileReader(parquetReader, pqarrow.ArrowReadProperties{BatchSize: int64(batchSize)}, memory.DefaultAllocator) 58 | } 59 | 60 | func NewRecordReaderFromConfig(config *ReaderConfig) (*RecordReader, error) { 61 | parquetFileReader, err := NewParquetFileReader(config) 62 | if err != nil { 63 | return nil, fmt.Errorf("could not get ParquetFileReader: %w", err) 64 | } 65 | 66 | arrowFileReader, err := NewArrowFileReader(config, parquetFileReader) 67 | if err != nil { 68 | return nil, fmt.Errorf("could not get ArrowFileReader: %w", err) 69 | } 70 | 71 | geoMetadata, err := GetMetadataFromFileReader(parquetFileReader) 72 | if err != nil { 73 | return nil, fmt.Errorf("could not get geo metadata from file reader: %w", err) 74 | } 75 | 76 | ctx := config.Context 77 | if ctx == nil { 78 | ctx = context.Background() 79 | } 80 | 81 | if config.Columns != nil { 82 | primaryGeomColIdx := parquetFileReader.MetaData().Schema.ColumnIndexByName(geoMetadata.PrimaryColumn) 83 | 84 | if !slices.Contains(config.Columns, primaryGeomColIdx) { 85 | return nil, fmt.Errorf("columns must include primary geometry column '%v' (index %v)", geoMetadata.PrimaryColumn, primaryGeomColIdx) 86 | } 87 | } 88 | 89 | if config.Columns != nil && len(config.Columns) == 0 { 90 | config.Columns = nil 91 | } 92 | 93 | if config.RowGroups != nil && len(config.RowGroups) == 0 { 94 | config.RowGroups = nil 95 | } 96 | 97 | recordReader, recordErr := arrowFileReader.GetRecordReader(ctx, config.Columns, config.RowGroups) 98 | 99 | if recordErr != nil { 100 | return nil, recordErr 101 | } 102 | 103 | reader := &RecordReader{ 104 | fileReader: arrowFileReader.ParquetReader(), 105 | metadata: geoMetadata, 106 | recordReader: recordReader, 107 | } 108 | return reader, nil 109 | } 110 | 111 | func NewRecordReader(ctx context.Context, arrowFileReader *pqarrow.FileReader, geoMetadata *Metadata, columns []int, rowGroups []int) (*RecordReader, error) { 112 | if columns != nil || len(columns) != 0 { 113 | primaryGeomColIdx := arrowFileReader.ParquetReader().MetaData().Schema.ColumnIndexByName(geoMetadata.PrimaryColumn) 114 | 115 | if !slices.Contains(columns, primaryGeomColIdx) { 116 | return nil, fmt.Errorf("columns (%v) must include primary geometry column '%v' (index %v)", columns, geoMetadata.PrimaryColumn, primaryGeomColIdx) 117 | } 118 | } 119 | 120 | if columns != nil && len(columns) == 0 { 121 | columns = nil 122 | } 123 | 124 | if rowGroups != nil && len(rowGroups) == 0 { 125 | rowGroups = nil 126 | } 127 | 128 | recordReader, recordErr := arrowFileReader.GetRecordReader(ctx, columns, rowGroups) 129 | 130 | if recordErr != nil { 131 | return nil, recordErr 132 | } 133 | 134 | reader := &RecordReader{ 135 | fileReader: arrowFileReader.ParquetReader(), 136 | metadata: geoMetadata, 137 | recordReader: recordReader, 138 | } 139 | return reader, nil 140 | } 141 | 142 | func (r *RecordReader) Read() (arrow.Record, error) { 143 | return r.recordReader.Read() 144 | } 145 | 146 | func (r *RecordReader) Metadata() *Metadata { 147 | return r.metadata 148 | } 149 | 150 | func (r *RecordReader) Schema() *schema.Schema { 151 | return r.fileReader.MetaData().Schema 152 | } 153 | 154 | func (r *RecordReader) ArrowSchema() *arrow.Schema { 155 | return r.recordReader.Schema() 156 | } 157 | 158 | func (r *RecordReader) NumRows() int64 { 159 | return r.fileReader.NumRows() 160 | } 161 | 162 | func (r *RecordReader) Close() error { 163 | r.recordReader.Release() 164 | return r.fileReader.Close() 165 | } 166 | -------------------------------------------------------------------------------- /internal/geoparquet/recordwriter.go: -------------------------------------------------------------------------------- 1 | package geoparquet 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | 8 | "github.com/apache/arrow/go/v16/arrow" 9 | "github.com/apache/arrow/go/v16/parquet" 10 | "github.com/apache/arrow/go/v16/parquet/pqarrow" 11 | ) 12 | 13 | type RecordWriter struct { 14 | fileWriter *pqarrow.FileWriter 15 | metadata *Metadata 16 | wroteGeoMetadata bool 17 | } 18 | 19 | func NewRecordWriter(config *WriterConfig) (*RecordWriter, error) { 20 | parquetProps := config.ParquetWriterProps 21 | if parquetProps == nil { 22 | parquetProps = parquet.NewWriterProperties() 23 | } 24 | 25 | arrowProps := config.ArrowWriterProps 26 | if arrowProps == nil { 27 | defaults := pqarrow.DefaultWriterProps() 28 | arrowProps = &defaults 29 | } 30 | 31 | if config.ArrowSchema == nil { 32 | return nil, errors.New("schema is required") 33 | } 34 | 35 | if config.Writer == nil { 36 | return nil, errors.New("writer is required") 37 | } 38 | fileWriter, fileErr := pqarrow.NewFileWriter(config.ArrowSchema, config.Writer, parquetProps, *arrowProps) 39 | if fileErr != nil { 40 | return nil, fileErr 41 | } 42 | 43 | writer := &RecordWriter{ 44 | fileWriter: fileWriter, 45 | metadata: config.Metadata, 46 | } 47 | 48 | return writer, nil 49 | } 50 | 51 | func (w *RecordWriter) AppendKeyValueMetadata(key string, value string) error { 52 | if err := w.fileWriter.AppendKeyValueMetadata(key, value); err != nil { 53 | return err 54 | } 55 | if key == MetadataKey { 56 | w.wroteGeoMetadata = true 57 | } 58 | return nil 59 | } 60 | 61 | func (w *RecordWriter) Write(record arrow.Record) error { 62 | return w.fileWriter.WriteBuffered(record) 63 | } 64 | 65 | func (w *RecordWriter) Close() error { 66 | if !w.wroteGeoMetadata { 67 | metadata := w.metadata 68 | if metadata == nil { 69 | metadata = DefaultMetadata() 70 | } 71 | data, err := json.Marshal(metadata) 72 | if err != nil { 73 | return fmt.Errorf("failed to encode %s file metadata", MetadataKey) 74 | } 75 | if err := w.fileWriter.AppendKeyValueMetadata(MetadataKey, string(data)); err != nil { 76 | return fmt.Errorf("failed to append %s file metadata", MetadataKey) 77 | } 78 | 79 | } 80 | return w.fileWriter.Close() 81 | } 82 | -------------------------------------------------------------------------------- /internal/geoparquet/writer.go: -------------------------------------------------------------------------------- 1 | package geoparquet 2 | 3 | import ( 4 | "io" 5 | 6 | "github.com/apache/arrow/go/v16/arrow" 7 | "github.com/apache/arrow/go/v16/parquet" 8 | "github.com/apache/arrow/go/v16/parquet/pqarrow" 9 | ) 10 | 11 | type WriterConfig struct { 12 | Writer io.Writer 13 | Metadata *Metadata 14 | ParquetWriterProps *parquet.WriterProperties 15 | ArrowWriterProps *pqarrow.ArrowWriterProperties 16 | ArrowSchema *arrow.Schema 17 | } 18 | -------------------------------------------------------------------------------- /internal/pqutil/arrow_test.go: -------------------------------------------------------------------------------- 1 | package pqutil_test 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/planetlabs/gpq/internal/pqutil" 8 | "github.com/planetlabs/gpq/internal/test" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestBuilder(t *testing.T) { 13 | cases := []struct { 14 | name string 15 | record map[string]any 16 | schema string 17 | }{ 18 | { 19 | name: "flat map", 20 | record: map[string]any{ 21 | "maybe": true, 22 | "answer": 42, 23 | "small": int32(32), 24 | "pi": 4.13, 25 | "data": []byte{'a', 'b', 'c'}, 26 | "good": "yup", 27 | }, 28 | schema: ` 29 | message { 30 | optional int64 answer (INT (64, true)); 31 | optional binary data; 32 | optional binary good (STRING); 33 | optional boolean maybe; 34 | optional double pi; 35 | optional int32 small (INT (32, true)); 36 | } 37 | `, 38 | }, 39 | { 40 | name: "with slices", 41 | record: map[string]any{ 42 | "bools": []any{true, false, true}, 43 | "strings": []any{"chicken", "noodle", "soup"}, 44 | "floats": []any{1.23, 4.56, 7.89}, 45 | "ints": []any{3, 2, 1}, 46 | }, 47 | schema: ` 48 | message { 49 | optional group bools (LIST) { 50 | repeated group list { 51 | optional boolean element; 52 | } 53 | } 54 | optional group floats (LIST) { 55 | repeated group list { 56 | optional double element; 57 | } 58 | } 59 | optional group ints (LIST) { 60 | repeated group list { 61 | optional int64 element (INT (64, true)); 62 | } 63 | } 64 | optional group strings (LIST) { 65 | repeated group list { 66 | optional binary element (STRING); 67 | } 68 | } 69 | } 70 | `, 71 | }, 72 | { 73 | name: "with maps", 74 | record: map[string]any{ 75 | "complex": map[string]any{ 76 | "maybe": true, 77 | "answer": 42, 78 | "small": int32(32), 79 | "pi": 4.13, 80 | "data": []byte{'a', 'b', 'c'}, 81 | "good": "yup", 82 | }, 83 | }, 84 | schema: ` 85 | message { 86 | optional group complex { 87 | optional int64 answer (INT (64, true)); 88 | optional binary data; 89 | optional binary good (STRING); 90 | optional boolean maybe; 91 | optional double pi; 92 | optional int32 small (INT (32, true)); 93 | } 94 | } 95 | `, 96 | }, 97 | { 98 | name: "with slices of maps", 99 | record: map[string]any{ 100 | "things": []any{ 101 | map[string]any{ 102 | "what": "soup", 103 | "cost": 1.00, 104 | }, 105 | map[string]any{ 106 | "what": "car", 107 | "cost": 40000.00, 108 | }, 109 | map[string]any{ 110 | "what": "house", 111 | "cost": 1000000.00, 112 | }, 113 | }, 114 | }, 115 | schema: ` 116 | message { 117 | optional group things (LIST) { 118 | repeated group list { 119 | optional group element { 120 | optional double cost; 121 | optional binary what (STRING); 122 | } 123 | } 124 | } 125 | } 126 | `, 127 | }, 128 | } 129 | 130 | for i, c := range cases { 131 | t.Run(fmt.Sprintf("%s (case %d)", c.name, i), func(t *testing.T) { 132 | b := pqutil.NewArrowSchemaBuilder() 133 | require.NoError(t, b.Add(c.record)) 134 | s, err := b.Schema() 135 | require.NoError(t, err) 136 | require.NotNil(t, s) 137 | test.AssertArrowSchemaMatches(t, c.schema, s) 138 | }) 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /internal/pqutil/compression.go: -------------------------------------------------------------------------------- 1 | package pqutil 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/apache/arrow/go/v16/parquet/compress" 7 | ) 8 | 9 | func GetCompression(codec string) (compress.Compression, error) { 10 | switch codec { 11 | case "uncompressed": 12 | return compress.Codecs.Uncompressed, nil 13 | case "snappy": 14 | return compress.Codecs.Snappy, nil 15 | case "gzip": 16 | return compress.Codecs.Gzip, nil 17 | case "brotli": 18 | return compress.Codecs.Brotli, nil 19 | case "zstd": 20 | return compress.Codecs.Zstd, nil 21 | case "lz4": 22 | return compress.Codecs.Lz4, nil 23 | default: 24 | return compress.Codecs.Uncompressed, fmt.Errorf("invalid compression codec %s", codec) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /internal/pqutil/parquet_test.go: -------------------------------------------------------------------------------- 1 | package pqutil_test 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/apache/arrow/go/v16/arrow" 8 | "github.com/apache/arrow/go/v16/parquet/pqarrow" 9 | "github.com/planetlabs/gpq/internal/pqutil" 10 | "github.com/planetlabs/gpq/internal/test" 11 | "github.com/stretchr/testify/assert" 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | func TestArrowToParquetString(t *testing.T) { 16 | cases := []struct { 17 | name string 18 | schema *arrow.Schema 19 | expected string 20 | }{ 21 | { 22 | name: "basic", 23 | schema: arrow.NewSchema([]arrow.Field{ 24 | {Name: "optional_bytes", Type: arrow.BinaryTypes.Binary, Nullable: true}, 25 | {Name: "optional_float32", Type: arrow.PrimitiveTypes.Float32, Nullable: true}, 26 | {Name: "optional_float64", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, 27 | {Name: "optional_int32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, 28 | {Name: "optional_int64", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, 29 | {Name: "optional_bool", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, 30 | {Name: "required_bool", Type: arrow.FixedWidthTypes.Boolean, Nullable: false}, 31 | {Name: "optional_string", Type: arrow.BinaryTypes.String, Nullable: true}, 32 | {Name: "required_fixed_binary", Type: &arrow.FixedSizeBinaryType{ByteWidth: 3}, Nullable: false}, 33 | }, nil), 34 | expected: ` 35 | message { 36 | optional binary optional_bytes; 37 | optional float optional_float32; 38 | optional double optional_float64; 39 | optional int32 optional_int32 (INT (32, true)); 40 | optional int64 optional_int64 (INT (64, true)); 41 | optional boolean optional_bool; 42 | required boolean required_bool; 43 | optional binary optional_string (STRING); 44 | required fixed_len_byte_array (24) required_fixed_binary; 45 | } 46 | `, 47 | }, 48 | { 49 | name: "lists", 50 | schema: arrow.NewSchema([]arrow.Field{ 51 | {Name: "optional_bools", Type: arrow.ListOf(arrow.FixedWidthTypes.Boolean), Nullable: true}, 52 | {Name: "required_nullable_strings", Type: arrow.ListOf(arrow.BinaryTypes.String), Nullable: false}, 53 | }, nil), 54 | expected: ` 55 | message { 56 | optional group optional_bools (LIST) { 57 | repeated group list { 58 | optional boolean element; 59 | } 60 | } 61 | required group required_nullable_strings (LIST) { 62 | repeated group list { 63 | optional binary element (STRING); 64 | } 65 | } 66 | } 67 | `, 68 | }, 69 | { 70 | name: "TODO: ticket this issue with non-nullable list items", 71 | schema: arrow.NewSchema([]arrow.Field{ 72 | {Name: "optional_nonnullable_bools", Type: arrow.ListOfNonNullable(arrow.FixedWidthTypes.Boolean), Nullable: false}, 73 | }, nil), 74 | expected: ` 75 | message { 76 | required group optional_nonnullable_bools (LIST) { 77 | repeated group list { 78 | optional boolean element; 79 | } 80 | } 81 | } 82 | `, 83 | }, 84 | { 85 | name: "structs", 86 | schema: arrow.NewSchema([]arrow.Field{ 87 | {Name: "soup", Type: arrow.StructOf( 88 | arrow.Field{Name: "good", Type: arrow.FixedWidthTypes.Boolean, Nullable: false}, 89 | arrow.Field{Name: "helpings", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, 90 | ), Nullable: false}, 91 | }, nil), 92 | expected: ` 93 | message { 94 | required group soup { 95 | required boolean good; 96 | optional double helpings; 97 | } 98 | } 99 | `, 100 | }, 101 | { 102 | name: "lists of structs", 103 | schema: arrow.NewSchema([]arrow.Field{ 104 | {Name: "things", Type: arrow.ListOf(arrow.StructOf( 105 | arrow.Field{Name: "name", Type: arrow.BinaryTypes.String, Nullable: false}, 106 | arrow.Field{Name: "cost", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, 107 | )), Nullable: true}, 108 | }, nil), 109 | expected: ` 110 | message { 111 | optional group things (LIST) { 112 | repeated group list { 113 | optional group element { 114 | required binary name (STRING); 115 | optional double cost; 116 | } 117 | } 118 | } 119 | } 120 | `, 121 | }, 122 | } 123 | 124 | for i, c := range cases { 125 | t.Run(fmt.Sprintf("%s (case %d)", c.name, i), func(t *testing.T) { 126 | parquetSchema, err := pqarrow.ToParquet(c.schema, nil, pqarrow.DefaultWriterProps()) 127 | require.NoError(t, err) 128 | 129 | assert.Equal(t, test.Tab2Space(test.Dedent(c.expected)), pqutil.ParquetSchemaString(parquetSchema)) 130 | }) 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /internal/storage/blob.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "strings" 8 | 9 | "gocloud.dev/blob" 10 | _ "gocloud.dev/blob/azureblob" 11 | _ "gocloud.dev/blob/fileblob" 12 | _ "gocloud.dev/blob/gcsblob" 13 | _ "gocloud.dev/blob/s3blob" 14 | "gocloud.dev/gcerrors" 15 | ) 16 | 17 | type BlobReader struct { 18 | ctx context.Context 19 | bucket *blob.Bucket 20 | key string 21 | size int64 22 | offset int64 23 | } 24 | 25 | func NewBlobReader(ctx context.Context, name string) (*BlobReader, error) { 26 | parts := strings.Split(name, "/") 27 | if len(parts) < 4 { 28 | return nil, fmt.Errorf("expected a name in the form :///") 29 | } 30 | var bucketName string 31 | var key string 32 | if parts[0] == "file:" { 33 | bucketName = strings.Join(parts[:len(parts)-1], "/") 34 | key = parts[len(parts)-1] 35 | } else { 36 | bucketName = strings.Join(parts[:3], "/") 37 | key = strings.Join(parts[3:], "/") 38 | } 39 | 40 | bucket, err := blob.OpenBucket(ctx, bucketName) 41 | if err != nil { 42 | return nil, fmt.Errorf("failed to open bucket %s, %w", bucketName, err) 43 | } 44 | 45 | attrs, err := bucket.Attributes(ctx, key) 46 | if err != nil { 47 | return nil, fmt.Errorf("failed to get attributes for %s, %w", name, err) 48 | } 49 | 50 | reader := &BlobReader{ 51 | ctx: ctx, 52 | bucket: bucket, 53 | key: key, 54 | size: attrs.Size, 55 | } 56 | 57 | return reader, nil 58 | } 59 | 60 | func (r *BlobReader) Seek(offset int64, whence int) (int64, error) { 61 | switch whence { 62 | case io.SeekCurrent: 63 | offset = r.offset + offset 64 | case io.SeekEnd: 65 | offset = r.size + offset 66 | } 67 | 68 | if offset < 0 { 69 | return 0, fmt.Errorf("attempt to seek to a negative offset: %d", offset) 70 | } 71 | r.offset = offset 72 | return offset, nil 73 | } 74 | 75 | func (r *BlobReader) ReadAt(data []byte, offset int64) (int, error) { 76 | _, err := r.Seek(offset, io.SeekStart) 77 | if err != nil { 78 | return 0, err 79 | } 80 | return r.readFull(data) 81 | } 82 | 83 | func (r *BlobReader) Read(data []byte) (int, error) { 84 | return r.readFull(data) 85 | } 86 | 87 | func (r *BlobReader) readFull(data []byte) (int, error) { 88 | rangeReader, err := r.bucket.NewRangeReader(r.ctx, r.key, r.offset, int64(len(data)), nil) 89 | if err != nil { 90 | return 0, err 91 | } 92 | defer func() { _ = rangeReader.Close() }() 93 | 94 | total := 0 95 | for { 96 | n, err := rangeReader.Read(data[total:]) 97 | total = total + n 98 | r.offset += int64(n) 99 | if total >= len(data) { 100 | break 101 | } 102 | if err != nil { 103 | return total, err 104 | } 105 | } 106 | return total, nil 107 | } 108 | 109 | func (r *BlobReader) Close() error { 110 | if err := r.bucket.Close(); err != nil { 111 | if gcerrors.Code(err) == gcerrors.FailedPrecondition { 112 | // allow mutiple calls to Close 113 | return nil 114 | } 115 | return err 116 | } 117 | return nil 118 | } 119 | -------------------------------------------------------------------------------- /internal/storage/blob_test.go: -------------------------------------------------------------------------------- 1 | package storage_test 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "io" 8 | "os" 9 | "testing" 10 | 11 | "github.com/planetlabs/gpq/internal/storage" 12 | "github.com/stretchr/testify/assert" 13 | "github.com/stretchr/testify/require" 14 | ) 15 | 16 | func createFile(t *testing.T, data []byte) string { 17 | f, err := os.CreateTemp("", "file.txt") 18 | require.NoError(t, err) 19 | 20 | _, err = f.Write(data) 21 | require.NoError(t, err) 22 | 23 | require.NoError(t, f.Close()) 24 | return f.Name() 25 | } 26 | 27 | func removeFile(t *testing.T, name string) { 28 | require.NoError(t, os.Remove(name)) 29 | } 30 | 31 | func TestBlobReaderReadAll(t *testing.T) { 32 | content := randBytes(t, 1024) 33 | name := createFile(t, content) 34 | defer removeFile(t, name) 35 | 36 | reader, err := storage.NewBlobReader(context.Background(), "file://"+name) 37 | require.NoError(t, err) 38 | 39 | data, err := io.ReadAll(reader) 40 | require.NoError(t, err) 41 | 42 | assert.Len(t, data, len(content)) 43 | require.NoError(t, reader.Close()) 44 | } 45 | 46 | func TestBlobReaderReadAt(t *testing.T) { 47 | content := randBytes(t, 1000) 48 | name := createFile(t, content) 49 | defer removeFile(t, name) 50 | 51 | blobReader, err := storage.NewBlobReader(context.Background(), "file://"+name) 52 | require.NoError(t, err) 53 | defer func() { _ = blobReader.Close() }() 54 | 55 | byteReader := bytes.NewReader(content) 56 | 57 | cases := []struct { 58 | name string 59 | offset int 60 | size int 61 | err string 62 | }{ 63 | { 64 | name: "first read", 65 | offset: 700, 66 | size: 50, 67 | }, 68 | { 69 | name: "second read", 70 | offset: 10, 71 | size: 10, 72 | }, 73 | { 74 | name: "offset after end", 75 | offset: len(content) + 10, 76 | size: 10, 77 | err: io.EOF.Error(), 78 | }, 79 | { 80 | name: "offset near end", 81 | offset: len(content) - 10, 82 | size: 20, 83 | err: io.EOF.Error(), 84 | }, 85 | { 86 | name: "offset before start", 87 | offset: -1, 88 | size: 10, 89 | err: "attempt to seek to a negative offset: -1", 90 | }, 91 | } 92 | 93 | for i, c := range cases { 94 | t.Run(fmt.Sprintf("%s (case %d)", c.name, i), func(t *testing.T) { 95 | data := make([]byte, c.size) 96 | read, err := blobReader.ReadAt(data, int64(c.offset)) 97 | if c.err == "" { 98 | require.NoError(t, err) 99 | } 100 | if err != nil { 101 | assert.ErrorContains(t, err, c.err) 102 | } 103 | expected := make([]byte, c.size) 104 | expectedRead, _ := byteReader.ReadAt(expected, int64(c.offset)) 105 | require.Equal(t, expectedRead, read) 106 | assert.Equal(t, expected[:read], data[:read]) 107 | }) 108 | } 109 | } 110 | 111 | func TestBlobReaderSeek(t *testing.T) { 112 | content := randBytes(t, 1000) 113 | name := createFile(t, content) 114 | 115 | blobReader, err := storage.NewBlobReader(context.Background(), "file://"+name) 116 | require.NoError(t, err) 117 | defer func() { _ = blobReader.Close() }() 118 | 119 | byteReader := bytes.NewReader(content) 120 | 121 | cases := []struct { 122 | name string 123 | offset int 124 | whence int 125 | err string 126 | }{ 127 | { 128 | name: "seek start", 129 | offset: 700, 130 | whence: io.SeekStart, 131 | }, 132 | { 133 | name: "seek current", 134 | offset: 10, 135 | whence: io.SeekCurrent, 136 | }, 137 | { 138 | name: "seek end", 139 | offset: -10, 140 | whence: io.SeekEnd, 141 | }, 142 | { 143 | name: "offset beyond end", 144 | offset: 10, 145 | whence: io.SeekEnd, 146 | }, 147 | { 148 | name: "offset before start", 149 | offset: -1, 150 | whence: io.SeekStart, 151 | err: "attempt to seek to a negative offset: -1", 152 | }, 153 | } 154 | 155 | for i, c := range cases { 156 | t.Run(fmt.Sprintf("%s (case %d)", c.name, i), func(t *testing.T) { 157 | data := make([]byte, 10) 158 | offset, seekErr := blobReader.Seek(int64(c.offset), c.whence) 159 | if c.err == "" { 160 | require.NoError(t, seekErr) 161 | return 162 | } 163 | if seekErr != nil { 164 | require.ErrorContains(t, seekErr, c.err) 165 | return 166 | } 167 | 168 | total := 0 169 | for { 170 | read, readErr := blobReader.Read(data[total:]) 171 | total += read 172 | if readErr == io.EOF { 173 | break 174 | } 175 | require.NoError(t, readErr) 176 | } 177 | 178 | expectedOffset, _ := byteReader.Seek(int64(c.offset), c.whence) 179 | assert.Equal(t, expectedOffset, offset) 180 | 181 | expected := make([]byte, len(data)) 182 | expectedTotal := 0 183 | for { 184 | read, err := byteReader.Read(expected[expectedTotal:]) 185 | expectedTotal += read 186 | if err == io.EOF { 187 | break 188 | } 189 | require.NoError(t, err) 190 | } 191 | 192 | assert.Equal(t, expectedTotal, total) 193 | assert.Equal(t, expected[:total], data[:total]) 194 | }) 195 | } 196 | } 197 | -------------------------------------------------------------------------------- /internal/storage/http.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "net/http" 8 | "strconv" 9 | "strings" 10 | ) 11 | 12 | const ( 13 | initialHttpRequestSize = 512 14 | minHttpRequestSize = 1024 15 | ) 16 | 17 | type HttpReader struct { 18 | url string 19 | offset int64 20 | size int64 21 | client *http.Client 22 | buffer ReaderAtSeeker 23 | bufferOffset int64 24 | bufferSize int64 25 | validator string 26 | } 27 | 28 | func NewHttpReader(url string) (*HttpReader, error) { 29 | reader := &HttpReader{ 30 | url: url, 31 | client: &http.Client{}, 32 | } 33 | if err := reader.init(); err != nil { 34 | return nil, err 35 | } 36 | return reader, nil 37 | } 38 | 39 | func (r *HttpReader) init() error { 40 | req, err := http.NewRequest(http.MethodGet, r.url, nil) 41 | if err != nil { 42 | return err 43 | } 44 | 45 | // make an initial range request to determine size 46 | req.Header.Add("Range", fmt.Sprintf("bytes=0-%d", initialHttpRequestSize-1)) 47 | resp, err := r.client.Do(req) 48 | if err != nil { 49 | return err 50 | } 51 | defer func() { _ = resp.Body.Close() }() 52 | if !success(resp) { 53 | return fmt.Errorf("unexpected response from %s: %d", r.url, resp.StatusCode) 54 | } 55 | 56 | data, readErr := io.ReadAll(resp.Body) 57 | if readErr != nil { 58 | return fmt.Errorf("failed to read response from %s: %w", r.url, readErr) 59 | } 60 | 61 | r.buffer = bytes.NewReader(data) 62 | r.bufferSize = int64(len(data)) 63 | 64 | str := resp.Header.Get("Content-Range") 65 | if strings.Contains(str, "/") { 66 | size, err := strconv.ParseInt(strings.Split(str, "/")[1], 10, 64) 67 | if err != nil { 68 | return fmt.Errorf("invalid content-range header from %s: %w", r.url, err) 69 | } 70 | r.size = size 71 | r.validator = validatorFromResponse(resp) 72 | } else { 73 | r.size = int64(len(data)) 74 | } 75 | return nil 76 | } 77 | 78 | func success(response *http.Response) bool { 79 | return response.StatusCode >= http.StatusOK && response.StatusCode < http.StatusMultipleChoices 80 | } 81 | 82 | func validatorFromResponse(resp *http.Response) string { 83 | etag := resp.Header.Get("ETag") 84 | if etag != "" && etag[0] == '"' { 85 | return etag 86 | } 87 | 88 | return resp.Header.Get("Last-Modified") 89 | } 90 | 91 | func (r *HttpReader) ReadAt(data []byte, offset int64) (int, error) { 92 | _, err := r.Seek(offset, io.SeekStart) 93 | if err != nil { 94 | return 0, err 95 | } 96 | 97 | total := 0 98 | for total < len(data) { 99 | n, err := r.Read(data[total:]) 100 | if err != nil { 101 | return total + n, err 102 | } 103 | total = total + n 104 | } 105 | return total, nil 106 | } 107 | 108 | func (r *HttpReader) Seek(offset int64, whence int) (int64, error) { 109 | switch whence { 110 | case io.SeekCurrent: 111 | offset = r.offset + offset 112 | case io.SeekEnd: 113 | offset = r.size + offset 114 | } 115 | 116 | if offset < 0 { 117 | return 0, fmt.Errorf("attempt to seek to a negative offset: %d", offset) 118 | } 119 | r.offset = offset 120 | return offset, nil 121 | } 122 | 123 | func (r *HttpReader) Read(data []byte) (n int, err error) { 124 | if r.offset > r.size { 125 | return 0, io.EOF 126 | } 127 | if r.buffer == nil || r.offset < r.bufferOffset || r.offset > r.bufferOffset+r.bufferSize { 128 | if err := r.request(int64(len(data))); err != nil { 129 | return 0, err 130 | } 131 | } 132 | read, err := r.buffer.ReadAt(data, r.offset-r.bufferOffset) 133 | r.offset = r.offset + int64(read) 134 | if err == io.EOF && r.offset < r.size { 135 | r.buffer = nil 136 | return read, nil 137 | } 138 | return read, err 139 | } 140 | 141 | func (r *HttpReader) request(size int64) error { 142 | req, err := http.NewRequest(http.MethodGet, r.url, nil) 143 | if err != nil { 144 | return err 145 | } 146 | requestSize := size 147 | if requestSize < minHttpRequestSize { 148 | requestSize = minHttpRequestSize 149 | } 150 | req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", r.offset, r.offset+requestSize)) 151 | if r.validator != "" { 152 | req.Header.Set("If-Range", r.validator) 153 | } 154 | 155 | resp, err := r.client.Do(req) 156 | if err != nil { 157 | return err 158 | } 159 | defer func() { _ = resp.Body.Close() }() 160 | if !success(resp) { 161 | return fmt.Errorf("unexpected response from %s: %d", r.url, resp.StatusCode) 162 | } 163 | 164 | data, err := io.ReadAll(resp.Body) 165 | if err != nil { 166 | return err 167 | } 168 | 169 | r.buffer = bytes.NewReader(data) 170 | r.bufferOffset = r.offset 171 | r.bufferSize = int64(len(data)) 172 | return nil 173 | } 174 | 175 | func (r *HttpReader) Close() error { 176 | if r.buffer != nil { 177 | r.buffer = nil 178 | } 179 | r.client.CloseIdleConnections() 180 | return nil 181 | } 182 | -------------------------------------------------------------------------------- /internal/storage/http_test.go: -------------------------------------------------------------------------------- 1 | package storage_test 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "net/http" 8 | "net/http/httptest" 9 | "testing" 10 | "time" 11 | 12 | "github.com/planetlabs/gpq/internal/storage" 13 | "github.com/stretchr/testify/assert" 14 | "github.com/stretchr/testify/require" 15 | ) 16 | 17 | func contentUrl(t *testing.T, content []byte) string { 18 | server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 19 | if r.Method != http.MethodGet { 20 | w.WriteHeader(http.StatusMethodNotAllowed) 21 | return 22 | } 23 | http.ServeContent(w, r, "content.txt", time.Time{}, bytes.NewReader([]byte(content))) 24 | })) 25 | return server.URL 26 | } 27 | 28 | func TestHttpReaderReadAll(t *testing.T) { 29 | content := randBytes(t, 1000) 30 | url := contentUrl(t, content) 31 | 32 | reader, err := storage.NewHttpReader(url) 33 | require.NoError(t, err) 34 | defer func() { _ = reader.Close() }() 35 | 36 | data, err := io.ReadAll(reader) 37 | require.NoError(t, err) 38 | assert.Equal(t, len(content), len(data)) 39 | assert.Equal(t, content, data) 40 | } 41 | 42 | func TestHttpReaderReadAt(t *testing.T) { 43 | content := randBytes(t, 1000) 44 | url := contentUrl(t, content) 45 | 46 | httpReader, err := storage.NewHttpReader(url) 47 | require.NoError(t, err) 48 | defer func() { _ = httpReader.Close() }() 49 | 50 | byteReader := bytes.NewReader(content) 51 | 52 | cases := []struct { 53 | name string 54 | offset int 55 | size int 56 | err string 57 | }{ 58 | { 59 | name: "first read", 60 | offset: 700, 61 | size: 50, 62 | }, 63 | { 64 | name: "second read", 65 | offset: 10, 66 | size: 10, 67 | }, 68 | { 69 | name: "offset after end", 70 | offset: len(content) + 10, 71 | size: 10, 72 | err: io.EOF.Error(), 73 | }, 74 | { 75 | name: "offset near end", 76 | offset: len(content) - 10, 77 | size: 20, 78 | err: io.EOF.Error(), 79 | }, 80 | { 81 | name: "offset before start", 82 | offset: -1, 83 | size: 10, 84 | err: "attempt to seek to a negative offset: -1", 85 | }, 86 | } 87 | 88 | for i, c := range cases { 89 | t.Run(fmt.Sprintf("%s (case %d)", c.name, i), func(t *testing.T) { 90 | data := make([]byte, c.size) 91 | read, err := httpReader.ReadAt(data, int64(c.offset)) 92 | if c.err == "" { 93 | require.NoError(t, err) 94 | } 95 | if err != nil { 96 | assert.ErrorContains(t, err, c.err) 97 | } 98 | expected := make([]byte, c.size) 99 | expectedRead, _ := byteReader.ReadAt(expected, int64(c.offset)) 100 | require.Equal(t, expectedRead, read) 101 | assert.Equal(t, expected[:read], data[:read]) 102 | }) 103 | } 104 | } 105 | 106 | func TestHttpReaderSeek(t *testing.T) { 107 | content := randBytes(t, 1000) 108 | url := contentUrl(t, content) 109 | 110 | httpReader, err := storage.NewHttpReader(url) 111 | require.NoError(t, err) 112 | defer func() { _ = httpReader.Close() }() 113 | 114 | byteReader := bytes.NewReader(content) 115 | 116 | cases := []struct { 117 | name string 118 | offset int 119 | whence int 120 | err string 121 | }{ 122 | { 123 | name: "seek start", 124 | offset: 700, 125 | whence: io.SeekStart, 126 | }, 127 | { 128 | name: "seek current", 129 | offset: 10, 130 | whence: io.SeekCurrent, 131 | }, 132 | { 133 | name: "seek end", 134 | offset: -10, 135 | whence: io.SeekEnd, 136 | }, 137 | { 138 | name: "offset beyond end", 139 | offset: 10, 140 | whence: io.SeekEnd, 141 | }, 142 | { 143 | name: "offset before start", 144 | offset: -1, 145 | whence: io.SeekStart, 146 | err: "attempt to seek to a negative offset: -1", 147 | }, 148 | } 149 | 150 | for i, c := range cases { 151 | t.Run(fmt.Sprintf("%s (case %d)", c.name, i), func(t *testing.T) { 152 | data := make([]byte, 10) 153 | offset, seekErr := httpReader.Seek(int64(c.offset), c.whence) 154 | if c.err == "" { 155 | require.NoError(t, seekErr) 156 | return 157 | } 158 | if seekErr != nil { 159 | require.ErrorContains(t, seekErr, c.err) 160 | return 161 | } 162 | 163 | total := 0 164 | for { 165 | read, readErr := httpReader.Read(data[total:]) 166 | total += read 167 | if readErr == io.EOF { 168 | break 169 | } 170 | require.NoError(t, readErr) 171 | } 172 | 173 | expectedOffset, _ := byteReader.Seek(int64(c.offset), c.whence) 174 | assert.Equal(t, expectedOffset, offset) 175 | 176 | expected := make([]byte, len(data)) 177 | expectedTotal := 0 178 | for { 179 | read, err := byteReader.Read(expected[expectedTotal:]) 180 | expectedTotal += read 181 | if err == io.EOF { 182 | break 183 | } 184 | require.NoError(t, err) 185 | } 186 | 187 | assert.Equal(t, expectedTotal, total) 188 | assert.Equal(t, expected[:total], data[:total]) 189 | }) 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /internal/storage/storage.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "net/url" 8 | 9 | "gocloud.dev/blob" 10 | ) 11 | 12 | type ReaderAtSeeker interface { 13 | io.Reader 14 | io.ReaderAt 15 | io.Seeker 16 | } 17 | 18 | var ( 19 | _ ReaderAtSeeker = (*HttpReader)(nil) 20 | _ ReaderAtSeeker = (*BlobReader)(nil) 21 | ) 22 | 23 | func NewReader(ctx context.Context, resource string) (ReaderAtSeeker, error) { 24 | u, err := url.Parse(resource) 25 | if err != nil { 26 | return nil, fmt.Errorf("failed to parse url: %w", err) 27 | } 28 | if u.Scheme == "http" || u.Scheme == "https" { 29 | return NewHttpReader(resource) 30 | } 31 | 32 | blobSchemes := blob.DefaultURLMux().BucketSchemes() 33 | for _, scheme := range blobSchemes { 34 | if u.Scheme == scheme { 35 | return NewBlobReader(ctx, resource) 36 | } 37 | } 38 | return nil, fmt.Errorf("unable to get storage reader for %q scheme", u.Scheme) 39 | } 40 | -------------------------------------------------------------------------------- /internal/storage/storage_test.go: -------------------------------------------------------------------------------- 1 | package storage_test 2 | 3 | import ( 4 | "context" 5 | "crypto/rand" 6 | "net/http" 7 | "net/http/httptest" 8 | "testing" 9 | 10 | "github.com/planetlabs/gpq/internal/storage" 11 | "github.com/stretchr/testify/assert" 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | func randBytes(t *testing.T, size int) []byte { 16 | data := make([]byte, size) 17 | n, err := rand.Read(data) 18 | require.NoError(t, err) 19 | require.Equal(t, n, size) 20 | return data 21 | } 22 | 23 | func TestNewHttpReader(t *testing.T) { 24 | server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 25 | w.WriteHeader(http.StatusOK) 26 | })) 27 | r, err := storage.NewReader(context.Background(), server.URL) 28 | require.NoError(t, err) 29 | 30 | reader, ok := r.(*storage.HttpReader) 31 | require.True(t, ok) 32 | 33 | assert.NoError(t, reader.Close()) 34 | } 35 | -------------------------------------------------------------------------------- /internal/testdata/cases/example-v0.4.0.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/planetlabs/gpq/5268b57083e0c54c38720564eac8c0afb740db89/internal/testdata/cases/example-v0.4.0.parquet -------------------------------------------------------------------------------- /internal/testdata/cases/example-v1.0.0-beta.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/planetlabs/gpq/5268b57083e0c54c38720564eac8c0afb740db89/internal/testdata/cases/example-v1.0.0-beta.1.parquet -------------------------------------------------------------------------------- /internal/testdata/cases/example-v1.0.0.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/planetlabs/gpq/5268b57083e0c54c38720564eac8c0afb740db89/internal/testdata/cases/example-v1.0.0.parquet -------------------------------------------------------------------------------- /internal/testdata/cases/example-v1.1.0-covering.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/planetlabs/gpq/5268b57083e0c54c38720564eac8c0afb740db89/internal/testdata/cases/example-v1.1.0-covering.parquet -------------------------------------------------------------------------------- /internal/testdata/cases/example-v1.1.0-partitioned.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/planetlabs/gpq/5268b57083e0c54c38720564eac8c0afb740db89/internal/testdata/cases/example-v1.1.0-partitioned.parquet -------------------------------------------------------------------------------- /internal/testdata/cases/example-v1.1.0.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/planetlabs/gpq/5268b57083e0c54c38720564eac8c0afb740db89/internal/testdata/cases/example-v1.1.0.parquet -------------------------------------------------------------------------------- /internal/testdata/schema/geoparquet.org/releases/v0.4.0/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "title": "GeoParquet", 4 | "type": "object", 5 | "description": "Parquet metadata included in the geo field.", 6 | "properties": { 7 | "version": { 8 | "type": "string", 9 | "const": "0.4.0", 10 | "description": "The version of the geoparquet metadata standard used when writing." 11 | }, 12 | "primary_column": { 13 | "type": "string", 14 | "description": "The name of the 'primary' geometry column." 15 | }, 16 | "columns": { 17 | "type": "object", 18 | "description": "Metadata about geometry columns, with each key is the name of a geometry column in the table.", 19 | "patternProperties": { 20 | ".*": { 21 | "type": "object", 22 | "properties": { 23 | "encoding": { 24 | "type": "string", 25 | "enum": ["WKB"], 26 | "description": "Name of the geometry encoding format. Currently only 'WKB' is supported." 27 | }, 28 | "geometry_type": { 29 | "oneOf": [ 30 | { 31 | "$ref": "#/$defs/geometry_type" 32 | }, 33 | { 34 | "type": "array", 35 | "items": { 36 | "$ref": "#/$defs/geometry_type" 37 | }, 38 | "uniqueItems": true 39 | } 40 | ], 41 | "description": "The geometry type(s) of all geometries, or 'Unknown' if they are not known." 42 | }, 43 | "crs": { 44 | "oneOf": [ 45 | { 46 | "$ref": "https://proj.org/schemas/v0.4/projjson.schema.json" 47 | }, 48 | { 49 | "type": "null" 50 | } 51 | ], 52 | "description": "JSON object representing the Coordinate Reference System (CRS) of the geometry. If the crs field is not included then the data in this column must be stored in longitude, latitude based on the WGS84 datum, and CRS-aware implementations should assume a default value of OGC:CRS84." 53 | }, 54 | "edges": { 55 | "type": "string", 56 | "enum": ["planar", "spherical"], 57 | "description": "Name of the coordinate system for the edges. Must be one of 'planar' or 'spherical'. The default value is 'planar'." 58 | }, 59 | "orientation": { 60 | "type": "string", 61 | "enum": ["counterclockwise"], 62 | "description": "Winding order of exterior ring of polygons; interior rings are wound in opposite order. If absent, no assertions are made regarding the winding order." 63 | }, 64 | "bbox": { 65 | "type": "array", 66 | "description": "Bounding Box of the geometries in the file, formatted according to RFC 7946, section 5.", 67 | "items": [ 68 | { 69 | "type": "number", 70 | "description": "The westmost constant longitude line that bounds the rectangle (xmin)." 71 | }, 72 | { 73 | "type": "number", 74 | "description": "The minimum constant latitude line that bounds the rectangle (ymin)." 75 | }, 76 | { 77 | "type": "number", 78 | "description": "The eastmost constant longitude line that bounds the rectangle (xmax)." 79 | }, 80 | { 81 | "type": "number", 82 | "description": "The maximum constant latitude line that bounds the rectangle (ymax)." 83 | } 84 | ] 85 | }, 86 | "epoch": { 87 | "type": "number", 88 | "description": "Coordinate epoch in case of a dynamic CRS, expressed as a decimal year." 89 | } 90 | }, 91 | "additionalProperties": true, 92 | "required": ["encoding", "geometry_type"] 93 | } 94 | } 95 | } 96 | }, 97 | "additionalProperties": true, 98 | "required": ["version", "primary_column", "columns"], 99 | "$defs": { 100 | "geometry_type": { 101 | "type": "string", 102 | "enum": ["Point", "LineString", "Polygon", "MultiPoint", "MultiLineString", "MultiPolygon", "GeometryCollection", "Unknown"] 103 | } 104 | } 105 | } -------------------------------------------------------------------------------- /internal/testdata/schema/geoparquet.org/releases/v1.0.0-beta.1/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "title": "GeoParquet", 4 | "description": "Parquet metadata included in the geo field.", 5 | "type": "object", 6 | "required": [ 7 | "version", 8 | "primary_column", 9 | "columns" 10 | ], 11 | "properties": { 12 | "version": { 13 | "type": "string", 14 | "const": "1.0.0-beta.1" 15 | }, 16 | "primary_column": { 17 | "type": "string", 18 | "minLength": 1 19 | }, 20 | "columns": { 21 | "type": "object", 22 | "minProperties": 1, 23 | "patternProperties": { 24 | ".+": { 25 | "type": "object", 26 | "required": [ 27 | "encoding", 28 | "geometry_types" 29 | ], 30 | "properties": { 31 | "encoding": { 32 | "type": "string", 33 | "const": "WKB" 34 | }, 35 | "geometry_types": { 36 | "type": "array", 37 | "uniqueItems": true, 38 | "items": { 39 | "type": "string", 40 | "pattern": "^(GeometryCollection|(Multi)?(Point|LineString|Polygon))( Z)?$" 41 | } 42 | }, 43 | "crs": { 44 | "oneOf": [ 45 | { 46 | "$ref": "https://proj.org/schemas/v0.5/projjson.schema.json" 47 | }, 48 | { 49 | "type": "null" 50 | } 51 | ] 52 | }, 53 | "edges": { 54 | "type": "string", 55 | "enum": [ 56 | "planar", 57 | "spherical" 58 | ] 59 | }, 60 | "orientation": { 61 | "type": "string", 62 | "const": "counterclockwise" 63 | }, 64 | "bbox": { 65 | "type": "array", 66 | "items": { 67 | "type": "number" 68 | }, 69 | "oneOf": [ 70 | { 71 | "description": "2D bbox consisting of (xmin, ymin, xmax, ymax)", 72 | "minItems": 4, 73 | "maxItems": 4 74 | }, 75 | { 76 | "description": "3D bbox consisting of (xmin, ymin, zmin, xmax, ymax, zmax)", 77 | "minItems": 6, 78 | "maxItems": 6 79 | } 80 | ] 81 | }, 82 | "epoch": { 83 | "type": "number" 84 | } 85 | } 86 | } 87 | }, 88 | "additionalProperties": false 89 | } 90 | } 91 | } -------------------------------------------------------------------------------- /internal/testdata/schema/geoparquet.org/releases/v1.0.0/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "title": "GeoParquet", 4 | "description": "Parquet metadata included in the geo field.", 5 | "type": "object", 6 | "required": ["version", "primary_column", "columns"], 7 | "properties": { 8 | "version": { 9 | "type": "string", 10 | "const": "1.0.0" 11 | }, 12 | "primary_column": { 13 | "type": "string", 14 | "minLength": 1 15 | }, 16 | "columns": { 17 | "type": "object", 18 | "minProperties": 1, 19 | "patternProperties": { 20 | ".+": { 21 | "type": "object", 22 | "required": ["encoding", "geometry_types"], 23 | "properties": { 24 | "encoding": { 25 | "type": "string", 26 | "const": "WKB" 27 | }, 28 | "geometry_types": { 29 | "type": "array", 30 | "uniqueItems": true, 31 | "items": { 32 | "type": "string", 33 | "pattern": "^(GeometryCollection|(Multi)?(Point|LineString|Polygon))( Z)?$" 34 | } 35 | }, 36 | "crs": { 37 | "oneOf": [ 38 | { 39 | "$ref": "https://proj.org/schemas/v0.5/projjson.schema.json" 40 | }, 41 | { 42 | "type": "null" 43 | } 44 | ] 45 | }, 46 | "edges": { 47 | "type": "string", 48 | "enum": ["planar", "spherical"] 49 | }, 50 | "orientation": { 51 | "type": "string", 52 | "const": "counterclockwise" 53 | }, 54 | "bbox": { 55 | "type": "array", 56 | "items": { 57 | "type": "number" 58 | }, 59 | "oneOf": [ 60 | { 61 | "description": "2D bbox consisting of (xmin, ymin, xmax, ymax)", 62 | "minItems": 4, 63 | "maxItems": 4 64 | }, 65 | { 66 | "description": "3D bbox consisting of (xmin, ymin, zmin, xmax, ymax, zmax)", 67 | "minItems": 6, 68 | "maxItems": 6 69 | } 70 | ] 71 | }, 72 | "epoch": { 73 | "type": "number" 74 | } 75 | } 76 | } 77 | }, 78 | "additionalProperties": false 79 | } 80 | } 81 | } -------------------------------------------------------------------------------- /internal/validator/testdata/.gitignore: -------------------------------------------------------------------------------- 1 | actual.json 2 | -------------------------------------------------------------------------------- /internal/validator/testdata/all-pass-meta/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": true 62 | }, 63 | { 64 | "title": "optional \"epoch\" must be a number", 65 | "run": true, 66 | "passed": true 67 | }, 68 | { 69 | "title": "geometry columns must not be grouped", 70 | "run": true, 71 | "passed": true 72 | }, 73 | { 74 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 75 | "run": true, 76 | "passed": true 77 | }, 78 | { 79 | "title": "geometry columns must be required or optional, not repeated", 80 | "run": true, 81 | "passed": true 82 | } 83 | ], 84 | "metadataOnly": true 85 | } -------------------------------------------------------------------------------- /internal/validator/testdata/all-pass-meta/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [] 9 | } 10 | } 11 | }, 12 | "data": { 13 | "type": "FeatureCollection", 14 | "features": [ 15 | { 16 | "type": "Feature", 17 | "properties": { 18 | "name": "Null Island" 19 | }, 20 | "geometry": { 21 | "type": "Point", 22 | "coordinates": [0, 0] 23 | } 24 | } 25 | ] 26 | } 27 | } -------------------------------------------------------------------------------- /internal/validator/testdata/all-pass-minimal/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": true 62 | }, 63 | { 64 | "title": "optional \"epoch\" must be a number", 65 | "run": true, 66 | "passed": true 67 | }, 68 | { 69 | "title": "geometry columns must not be grouped", 70 | "run": true, 71 | "passed": true 72 | }, 73 | { 74 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 75 | "run": true, 76 | "passed": true 77 | }, 78 | { 79 | "title": "geometry columns must be required or optional, not repeated", 80 | "run": true, 81 | "passed": true 82 | }, 83 | { 84 | "title": "all geometry values match the \"encoding\" metadata", 85 | "run": true, 86 | "passed": true 87 | }, 88 | { 89 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 90 | "run": true, 91 | "passed": true 92 | }, 93 | { 94 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 95 | "run": true, 96 | "passed": true 97 | }, 98 | { 99 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 100 | "run": true, 101 | "passed": true 102 | } 103 | ], 104 | "metadataOnly": false 105 | } -------------------------------------------------------------------------------- /internal/validator/testdata/all-pass-minimal/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [] 9 | } 10 | } 11 | }, 12 | "data": { 13 | "type": "FeatureCollection", 14 | "features": [ 15 | { 16 | "type": "Feature", 17 | "properties": { 18 | "name": "Null Island" 19 | }, 20 | "geometry": { 21 | "type": "Point", 22 | "coordinates": [0, 0] 23 | } 24 | } 25 | ] 26 | } 27 | } -------------------------------------------------------------------------------- /internal/validator/testdata/all-pass/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": true 62 | }, 63 | { 64 | "title": "optional \"epoch\" must be a number", 65 | "run": true, 66 | "passed": true 67 | }, 68 | { 69 | "title": "geometry columns must not be grouped", 70 | "run": true, 71 | "passed": true 72 | }, 73 | { 74 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 75 | "run": true, 76 | "passed": true 77 | }, 78 | { 79 | "title": "geometry columns must be required or optional, not repeated", 80 | "run": true, 81 | "passed": true 82 | }, 83 | { 84 | "title": "all geometry values match the \"encoding\" metadata", 85 | "run": true, 86 | "passed": true 87 | }, 88 | { 89 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 90 | "run": true, 91 | "passed": true 92 | }, 93 | { 94 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 95 | "run": true, 96 | "passed": true 97 | }, 98 | { 99 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 100 | "run": true, 101 | "passed": true 102 | } 103 | ], 104 | "metadataOnly": false 105 | } -------------------------------------------------------------------------------- /internal/validator/testdata/all-pass/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [ 9 | "Point" 10 | ], 11 | "orientation": "counterclockwise", 12 | "edges": "planar", 13 | "bbox": [ 14 | 0, 15 | 0, 16 | 0, 17 | 0 18 | ], 19 | "epoch": 2021.47, 20 | "crs": { 21 | "$schema": "https://proj.org/schemas/v0.5/projjson.schema.json", 22 | "type": "GeographicCRS", 23 | "name": "WGS 84 longitude-latitude", 24 | "datum": { 25 | "type": "GeodeticReferenceFrame", 26 | "name": "World Geodetic System 1984", 27 | "ellipsoid": { 28 | "name": "WGS 84", 29 | "semi_major_axis": 6378137, 30 | "inverse_flattening": 298.257223563 31 | } 32 | }, 33 | "coordinate_system": { 34 | "subtype": "ellipsoidal", 35 | "axis": [ 36 | { 37 | "name": "Geodetic longitude", 38 | "abbreviation": "Lon", 39 | "direction": "east", 40 | "unit": "degree" 41 | }, 42 | { 43 | "name": "Geodetic latitude", 44 | "abbreviation": "Lat", 45 | "direction": "north", 46 | "unit": "degree" 47 | } 48 | ] 49 | }, 50 | "id": { 51 | "authority": "OGC", 52 | "code": "CRS84" 53 | } 54 | } 55 | } 56 | } 57 | }, 58 | "data": { 59 | "type": "FeatureCollection", 60 | "features": [ 61 | { 62 | "type": "Feature", 63 | "properties": { 64 | "name": "Null Island" 65 | }, 66 | "geometry": { 67 | "type": "Point", 68 | "coordinates": [ 69 | 0, 70 | 0 71 | ] 72 | } 73 | } 74 | ] 75 | } 76 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-bbox-item-type/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": false, 31 | "passed": false 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": false, 62 | "message": "expected \"bbox\" for column \"geometry\" to be a list of numbers, got [\"not\",\"a\",\"bounding\",\"box\"]" 63 | }, 64 | { 65 | "title": "optional \"epoch\" must be a number", 66 | "run": false, 67 | "passed": false 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": false, 72 | "passed": false 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": false, 77 | "passed": false 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": false, 82 | "passed": false 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": false, 87 | "passed": false 88 | }, 89 | { 90 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 91 | "run": false, 92 | "passed": false 93 | }, 94 | { 95 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 96 | "run": false, 97 | "passed": false 98 | }, 99 | { 100 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 101 | "run": false, 102 | "passed": false 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-bbox-item-type/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [], 9 | "bbox": ["not", "a", "bounding", "box"] 10 | } 11 | } 12 | }, 13 | "data": { 14 | "type": "FeatureCollection", 15 | "features": [ 16 | { 17 | "type": "Feature", 18 | "properties": { 19 | "name": "Null Island" 20 | }, 21 | "geometry": { 22 | "type": "Point", 23 | "coordinates": [0, 0] 24 | } 25 | } 26 | ] 27 | } 28 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-bbox-length/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": false, 62 | "message": "expected \"bbox\" for column \"geometry\" to be a list of 4 or 6 numbers, got [-1,1]" 63 | }, 64 | { 65 | "title": "optional \"epoch\" must be a number", 66 | "run": true, 67 | "passed": true 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": true, 72 | "passed": true 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": true, 77 | "passed": true 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": true, 82 | "passed": true 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": true, 87 | "passed": true 88 | }, 89 | { 90 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 91 | "run": true, 92 | "passed": true 93 | }, 94 | { 95 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 96 | "run": true, 97 | "passed": true 98 | }, 99 | { 100 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 101 | "run": true, 102 | "passed": false, 103 | "message": "invalid bbox length for column \"geometry\"" 104 | } 105 | ], 106 | "metadataOnly": false 107 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-bbox-length/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [], 9 | "bbox": [-1, 1] 10 | } 11 | } 12 | }, 13 | "data": { 14 | "type": "FeatureCollection", 15 | "features": [ 16 | { 17 | "type": "Feature", 18 | "properties": { 19 | "name": "Null Island" 20 | }, 21 | "geometry": { 22 | "type": "Point", 23 | "coordinates": [0, 0] 24 | } 25 | } 26 | ] 27 | } 28 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-bbox-type/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": false, 31 | "passed": false 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": false, 62 | "message": "expected \"bbox\" for column \"geometry\" to be a list, got a string: \"bogus\"" 63 | }, 64 | { 65 | "title": "optional \"epoch\" must be a number", 66 | "run": false, 67 | "passed": false 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": false, 72 | "passed": false 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": false, 77 | "passed": false 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": false, 82 | "passed": false 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": false, 87 | "passed": false 88 | }, 89 | { 90 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 91 | "run": false, 92 | "passed": false 93 | }, 94 | { 95 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 96 | "run": false, 97 | "passed": false 98 | }, 99 | { 100 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 101 | "run": false, 102 | "passed": false 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-bbox-type/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [], 9 | "bbox": "bogus" 10 | } 11 | } 12 | }, 13 | "data": { 14 | "type": "FeatureCollection", 15 | "features": [ 16 | { 17 | "type": "Feature", 18 | "properties": { 19 | "name": "Null Island" 20 | }, 21 | "geometry": { 22 | "type": "Point", 23 | "coordinates": [0, 0] 24 | } 25 | } 26 | ] 27 | } 28 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-crs-type/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": false, 31 | "passed": false 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": false, 47 | "message": "expected \"crs\" for column \"geometry\" to be an object, got a string: \"bogus\"" 48 | }, 49 | { 50 | "title": "optional \"orientation\" must be a valid string", 51 | "run": false, 52 | "passed": false 53 | }, 54 | { 55 | "title": "optional \"edges\" must be a valid string", 56 | "run": false, 57 | "passed": false 58 | }, 59 | { 60 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 61 | "run": false, 62 | "passed": false 63 | }, 64 | { 65 | "title": "optional \"epoch\" must be a number", 66 | "run": false, 67 | "passed": false 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": false, 72 | "passed": false 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": false, 77 | "passed": false 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": false, 82 | "passed": false 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": false, 87 | "passed": false 88 | }, 89 | { 90 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 91 | "run": false, 92 | "passed": false 93 | }, 94 | { 95 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 96 | "run": false, 97 | "passed": false 98 | }, 99 | { 100 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 101 | "run": false, 102 | "passed": false 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-crs-type/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [], 9 | "crs": "bogus" 10 | } 11 | } 12 | }, 13 | "data": { 14 | "type": "FeatureCollection", 15 | "features": [ 16 | { 17 | "type": "Feature", 18 | "properties": { 19 | "name": "Null Island" 20 | }, 21 | "geometry": { 22 | "type": "Point", 23 | "coordinates": [ 24 | 0, 25 | 0 26 | ] 27 | } 28 | } 29 | ] 30 | } 31 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-crs/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": false, 47 | "message": "validation failed against https://proj.org/schemas/v0.6/projjson.schema.json: input is invalid: missing properties: 'source_crs', 'target_crs', 'transformation'" 48 | }, 49 | { 50 | "title": "optional \"orientation\" must be a valid string", 51 | "run": true, 52 | "passed": true 53 | }, 54 | { 55 | "title": "optional \"edges\" must be a valid string", 56 | "run": true, 57 | "passed": true 58 | }, 59 | { 60 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 61 | "run": true, 62 | "passed": true 63 | }, 64 | { 65 | "title": "optional \"epoch\" must be a number", 66 | "run": true, 67 | "passed": true 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": true, 72 | "passed": true 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": true, 77 | "passed": true 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": true, 82 | "passed": true 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": true, 87 | "passed": true 88 | }, 89 | { 90 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 91 | "run": true, 92 | "passed": true 93 | }, 94 | { 95 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 96 | "run": true, 97 | "passed": true 98 | }, 99 | { 100 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 101 | "run": true, 102 | "passed": true 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-crs/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [], 9 | "crs": { 10 | "type": "BogusCRS" 11 | } 12 | } 13 | } 14 | }, 15 | "data": { 16 | "type": "FeatureCollection", 17 | "features": [ 18 | { 19 | "type": "Feature", 20 | "properties": { 21 | "name": "Null Island" 22 | }, 23 | "geometry": { 24 | "type": "Point", 25 | "coordinates": [ 26 | 0, 27 | 0 28 | ] 29 | } 30 | } 31 | ] 32 | } 33 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-edges/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": false, 57 | "message": "unsupported edges \"bogus\" for column \"geometry\", expected \"planar\" or \"spherical\"" 58 | }, 59 | { 60 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 61 | "run": true, 62 | "passed": true 63 | }, 64 | { 65 | "title": "optional \"epoch\" must be a number", 66 | "run": true, 67 | "passed": true 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": true, 72 | "passed": true 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": true, 77 | "passed": true 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": true, 82 | "passed": true 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": true, 87 | "passed": true 88 | }, 89 | { 90 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 91 | "run": true, 92 | "passed": true 93 | }, 94 | { 95 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 96 | "run": true, 97 | "passed": true 98 | }, 99 | { 100 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 101 | "run": true, 102 | "passed": true 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-edges/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [], 9 | "edges": "bogus" 10 | } 11 | } 12 | }, 13 | "data": { 14 | "type": "FeatureCollection", 15 | "features": [ 16 | { 17 | "type": "Feature", 18 | "properties": { 19 | "name": "Null Island" 20 | }, 21 | "geometry": { 22 | "type": "Point", 23 | "coordinates": [0, 0] 24 | } 25 | } 26 | ] 27 | } 28 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-encoding/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": false, 37 | "message": "unsupported encoding \"bogus\" for column \"geometry\"" 38 | }, 39 | { 40 | "title": "column metadata must include a \"geometry_types\" list", 41 | "run": true, 42 | "passed": true 43 | }, 44 | { 45 | "title": "optional \"crs\" must be null or a PROJJSON object", 46 | "run": true, 47 | "passed": true 48 | }, 49 | { 50 | "title": "optional \"orientation\" must be a valid string", 51 | "run": true, 52 | "passed": true 53 | }, 54 | { 55 | "title": "optional \"edges\" must be a valid string", 56 | "run": true, 57 | "passed": true 58 | }, 59 | { 60 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 61 | "run": true, 62 | "passed": true 63 | }, 64 | { 65 | "title": "optional \"epoch\" must be a number", 66 | "run": true, 67 | "passed": true 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": true, 72 | "passed": true 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": true, 77 | "passed": true 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": true, 82 | "passed": true 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": true, 87 | "passed": false, 88 | "message": "invalid geometry in column \"geometry\": unsupported encoding: bogus" 89 | }, 90 | { 91 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 92 | "run": false, 93 | "passed": false 94 | }, 95 | { 96 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 97 | "run": false, 98 | "passed": false 99 | }, 100 | { 101 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 102 | "run": false, 103 | "passed": false 104 | } 105 | ], 106 | "metadataOnly": false 107 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-encoding/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "bogus", 8 | "geometry_types": [] 9 | } 10 | } 11 | }, 12 | "data": { 13 | "type": "FeatureCollection", 14 | "features": [ 15 | { 16 | "type": "Feature", 17 | "properties": { 18 | "name": "Null Island" 19 | }, 20 | "geometry": { 21 | "type": "Point", 22 | "coordinates": [0, 0] 23 | } 24 | } 25 | ] 26 | } 27 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-epoch/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": false, 31 | "passed": false 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": true 62 | }, 63 | { 64 | "title": "optional \"epoch\" must be a number", 65 | "run": true, 66 | "passed": false, 67 | "message": "expected \"epoch\" for column \"geometry\" to be a number, got a string: \"bogus\"" 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": false, 72 | "passed": false 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": false, 77 | "passed": false 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": false, 82 | "passed": false 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": false, 87 | "passed": false 88 | }, 89 | { 90 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 91 | "run": false, 92 | "passed": false 93 | }, 94 | { 95 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 96 | "run": false, 97 | "passed": false 98 | }, 99 | { 100 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 101 | "run": false, 102 | "passed": false 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-epoch/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [], 9 | "epoch": "bogus" 10 | } 11 | } 12 | }, 13 | "data": { 14 | "type": "FeatureCollection", 15 | "features": [ 16 | { 17 | "type": "Feature", 18 | "properties": { 19 | "name": "Null Island" 20 | }, 21 | "geometry": { 22 | "type": "Point", 23 | "coordinates": [0, 0] 24 | } 25 | } 26 | ] 27 | } 28 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-geometry-types/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": false, 42 | "message": "unsupported geometry type \"bogus\" for column \"geometry\"" 43 | }, 44 | { 45 | "title": "optional \"crs\" must be null or a PROJJSON object", 46 | "run": true, 47 | "passed": true 48 | }, 49 | { 50 | "title": "optional \"orientation\" must be a valid string", 51 | "run": true, 52 | "passed": true 53 | }, 54 | { 55 | "title": "optional \"edges\" must be a valid string", 56 | "run": true, 57 | "passed": true 58 | }, 59 | { 60 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 61 | "run": true, 62 | "passed": true 63 | }, 64 | { 65 | "title": "optional \"epoch\" must be a number", 66 | "run": true, 67 | "passed": true 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": true, 72 | "passed": true 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": true, 77 | "passed": true 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": true, 82 | "passed": true 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": true, 87 | "passed": true 88 | }, 89 | { 90 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 91 | "run": true, 92 | "passed": false, 93 | "message": "unexpected geometry type \"Point\" for column \"geometry\"" 94 | }, 95 | { 96 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 97 | "run": true, 98 | "passed": true 99 | }, 100 | { 101 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 102 | "run": true, 103 | "passed": true 104 | } 105 | ], 106 | "metadataOnly": false 107 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-geometry-types/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": ["bogus"] 9 | } 10 | } 11 | }, 12 | "data": { 13 | "type": "FeatureCollection", 14 | "features": [ 15 | { 16 | "type": "Feature", 17 | "properties": { 18 | "name": "Null Island" 19 | }, 20 | "geometry": { 21 | "type": "Point", 22 | "coordinates": [0, 0] 23 | } 24 | } 25 | ] 26 | } 27 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-metadata-type/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": false, 12 | "message": "failed to parse file metadata as a JSON object" 13 | }, 14 | { 15 | "title": "metadata must include a \"version\" string", 16 | "run": false, 17 | "passed": false 18 | }, 19 | { 20 | "title": "metadata must include a \"primary_column\" string", 21 | "run": false, 22 | "passed": false 23 | }, 24 | { 25 | "title": "metadata must include a \"columns\" object", 26 | "run": false, 27 | "passed": false 28 | }, 29 | { 30 | "title": "column metadata must include the \"primary_column\" name", 31 | "run": false, 32 | "passed": false 33 | }, 34 | { 35 | "title": "column metadata must include a valid \"encoding\" string", 36 | "run": false, 37 | "passed": false 38 | }, 39 | { 40 | "title": "column metadata must include a \"geometry_types\" list", 41 | "run": false, 42 | "passed": false 43 | }, 44 | { 45 | "title": "optional \"crs\" must be null or a PROJJSON object", 46 | "run": false, 47 | "passed": false 48 | }, 49 | { 50 | "title": "optional \"orientation\" must be a valid string", 51 | "run": false, 52 | "passed": false 53 | }, 54 | { 55 | "title": "optional \"edges\" must be a valid string", 56 | "run": false, 57 | "passed": false 58 | }, 59 | { 60 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 61 | "run": false, 62 | "passed": false 63 | }, 64 | { 65 | "title": "optional \"epoch\" must be a number", 66 | "run": false, 67 | "passed": false 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": false, 72 | "passed": false 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": false, 77 | "passed": false 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": false, 82 | "passed": false 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": false, 87 | "passed": false 88 | }, 89 | { 90 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 91 | "run": false, 92 | "passed": false 93 | }, 94 | { 95 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 96 | "run": false, 97 | "passed": false 98 | }, 99 | { 100 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 101 | "run": false, 102 | "passed": false 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-metadata-type/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": "bad metadata", 3 | "data": { 4 | "type": "FeatureCollection", 5 | "features": [ 6 | { 7 | "type": "Feature", 8 | "properties": { 9 | "name": "Null Island" 10 | }, 11 | "geometry": { 12 | "type": "Point", 13 | "coordinates": [0, 0] 14 | } 15 | } 16 | ] 17 | } 18 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-orientation/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": false, 52 | "message": "unsupported orientation \"bogus\" for column \"geometry\", expected \"counterclockwise\"" 53 | }, 54 | { 55 | "title": "optional \"edges\" must be a valid string", 56 | "run": true, 57 | "passed": true 58 | }, 59 | { 60 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 61 | "run": true, 62 | "passed": true 63 | }, 64 | { 65 | "title": "optional \"epoch\" must be a number", 66 | "run": true, 67 | "passed": true 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": true, 72 | "passed": true 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": true, 77 | "passed": true 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": true, 82 | "passed": true 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": true, 87 | "passed": true 88 | }, 89 | { 90 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 91 | "run": true, 92 | "passed": true 93 | }, 94 | { 95 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 96 | "run": true, 97 | "passed": false, 98 | "message": "unsupported orientation \"bogus\" for column \"geometry\"" 99 | }, 100 | { 101 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 102 | "run": true, 103 | "passed": true 104 | } 105 | ], 106 | "metadataOnly": false 107 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-orientation/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [], 9 | "orientation": "bogus" 10 | } 11 | } 12 | }, 13 | "data": { 14 | "type": "FeatureCollection", 15 | "features": [ 16 | { 17 | "type": "Feature", 18 | "properties": { 19 | "name": "Null Island" 20 | }, 21 | "geometry": { 22 | "type": "Point", 23 | "coordinates": [0, 0] 24 | } 25 | } 26 | ] 27 | } 28 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-primary-column/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": false, 32 | "message": "the \"bogus\" column is not included in the column metadata" 33 | }, 34 | { 35 | "title": "column metadata must include a valid \"encoding\" string", 36 | "run": true, 37 | "passed": true 38 | }, 39 | { 40 | "title": "column metadata must include a \"geometry_types\" list", 41 | "run": true, 42 | "passed": true 43 | }, 44 | { 45 | "title": "optional \"crs\" must be null or a PROJJSON object", 46 | "run": true, 47 | "passed": true 48 | }, 49 | { 50 | "title": "optional \"orientation\" must be a valid string", 51 | "run": true, 52 | "passed": true 53 | }, 54 | { 55 | "title": "optional \"edges\" must be a valid string", 56 | "run": true, 57 | "passed": true 58 | }, 59 | { 60 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 61 | "run": true, 62 | "passed": true 63 | }, 64 | { 65 | "title": "optional \"epoch\" must be a number", 66 | "run": true, 67 | "passed": true 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": true, 72 | "passed": true 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": true, 77 | "passed": true 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": true, 82 | "passed": true 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": true, 87 | "passed": true 88 | }, 89 | { 90 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 91 | "run": true, 92 | "passed": true 93 | }, 94 | { 95 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 96 | "run": true, 97 | "passed": true 98 | }, 99 | { 100 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 101 | "run": true, 102 | "passed": true 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/bad-primary-column/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "bogus", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [] 9 | } 10 | } 11 | }, 12 | "data": { 13 | "type": "FeatureCollection", 14 | "features": [ 15 | { 16 | "type": "Feature", 17 | "properties": { 18 | "name": "Null Island" 19 | }, 20 | "geometry": { 21 | "type": "Point", 22 | "coordinates": [0, 0] 23 | } 24 | } 25 | ] 26 | } 27 | } -------------------------------------------------------------------------------- /internal/validator/testdata/complex-types/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": true 62 | }, 63 | { 64 | "title": "optional \"epoch\" must be a number", 65 | "run": true, 66 | "passed": true 67 | }, 68 | { 69 | "title": "geometry columns must not be grouped", 70 | "run": true, 71 | "passed": true 72 | }, 73 | { 74 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 75 | "run": true, 76 | "passed": true 77 | }, 78 | { 79 | "title": "geometry columns must be required or optional, not repeated", 80 | "run": true, 81 | "passed": true 82 | }, 83 | { 84 | "title": "all geometry values match the \"encoding\" metadata", 85 | "run": true, 86 | "passed": true 87 | }, 88 | { 89 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 90 | "run": true, 91 | "passed": true 92 | }, 93 | { 94 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 95 | "run": true, 96 | "passed": true 97 | }, 98 | { 99 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 100 | "run": true, 101 | "passed": true 102 | } 103 | ], 104 | "metadataOnly": false 105 | } -------------------------------------------------------------------------------- /internal/validator/testdata/complex-types/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [ 9 | "Point" 10 | ], 11 | "orientation": "counterclockwise", 12 | "edges": "planar", 13 | "bbox": [ 14 | 0, 15 | 0, 16 | 0, 17 | 0 18 | ], 19 | "epoch": 2021.47, 20 | "crs": { 21 | "$schema": "https://proj.org/schemas/v0.5/projjson.schema.json", 22 | "type": "GeographicCRS", 23 | "name": "WGS 84 longitude-latitude", 24 | "datum": { 25 | "type": "GeodeticReferenceFrame", 26 | "name": "World Geodetic System 1984", 27 | "ellipsoid": { 28 | "name": "WGS 84", 29 | "semi_major_axis": 6378137, 30 | "inverse_flattening": 298.257223563 31 | } 32 | }, 33 | "coordinate_system": { 34 | "subtype": "ellipsoidal", 35 | "axis": [ 36 | { 37 | "name": "Geodetic longitude", 38 | "abbreviation": "Lon", 39 | "direction": "east", 40 | "unit": "degree" 41 | }, 42 | { 43 | "name": "Geodetic latitude", 44 | "abbreviation": "Lat", 45 | "direction": "north", 46 | "unit": "degree" 47 | } 48 | ] 49 | }, 50 | "id": { 51 | "authority": "OGC", 52 | "code": "CRS84" 53 | } 54 | } 55 | } 56 | } 57 | }, 58 | "data": { 59 | "type": "FeatureCollection", 60 | "features": [ 61 | { 62 | "type": "Feature", 63 | "properties": { 64 | "numbers": [2, 4, 6, 8], 65 | "strings": ["chicken", "soup"], 66 | "object": { 67 | "name": "Bob" 68 | }, 69 | "names": { 70 | "common": [ 71 | {"value": "Hello", "language": "en"} 72 | ] 73 | } 74 | }, 75 | "geometry": { 76 | "type": "Point", 77 | "coordinates": [ 78 | 0, 79 | 0 80 | ] 81 | } 82 | } 83 | ] 84 | } 85 | } -------------------------------------------------------------------------------- /internal/validator/testdata/geometry-correctly-oriented/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": true 62 | }, 63 | { 64 | "title": "optional \"epoch\" must be a number", 65 | "run": true, 66 | "passed": true 67 | }, 68 | { 69 | "title": "geometry columns must not be grouped", 70 | "run": true, 71 | "passed": true 72 | }, 73 | { 74 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 75 | "run": true, 76 | "passed": true 77 | }, 78 | { 79 | "title": "geometry columns must be required or optional, not repeated", 80 | "run": true, 81 | "passed": true 82 | }, 83 | { 84 | "title": "all geometry values match the \"encoding\" metadata", 85 | "run": true, 86 | "passed": true 87 | }, 88 | { 89 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 90 | "run": true, 91 | "passed": true 92 | }, 93 | { 94 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 95 | "run": true, 96 | "passed": true 97 | }, 98 | { 99 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 100 | "run": true, 101 | "passed": true 102 | } 103 | ], 104 | "metadataOnly": false 105 | } -------------------------------------------------------------------------------- /internal/validator/testdata/geometry-correctly-oriented/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [], 9 | "orientation": "counterclockwise" 10 | } 11 | } 12 | }, 13 | "data": { 14 | "type": "FeatureCollection", 15 | "features": [ 16 | { 17 | "type": "Feature", 18 | "properties": { 19 | "name": "Right Hand Rule" 20 | }, 21 | "geometry": { 22 | "type": "Polygon", 23 | "coordinates": [ 24 | [[-10, -10], [10, -10], [10, 10], [-10, 10], [-10, -10]], 25 | [[-5, -5], [-5, 5], [5, 5], [5, -5], [-5, -5]] 26 | ] 27 | } 28 | } 29 | ] 30 | } 31 | } -------------------------------------------------------------------------------- /internal/validator/testdata/geometry-incorrectly-oriented/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": true 62 | }, 63 | { 64 | "title": "optional \"epoch\" must be a number", 65 | "run": true, 66 | "passed": true 67 | }, 68 | { 69 | "title": "geometry columns must not be grouped", 70 | "run": true, 71 | "passed": true 72 | }, 73 | { 74 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 75 | "run": true, 76 | "passed": true 77 | }, 78 | { 79 | "title": "geometry columns must be required or optional, not repeated", 80 | "run": true, 81 | "passed": true 82 | }, 83 | { 84 | "title": "all geometry values match the \"encoding\" metadata", 85 | "run": true, 86 | "passed": true 87 | }, 88 | { 89 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 90 | "run": true, 91 | "passed": true 92 | }, 93 | { 94 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 95 | "run": true, 96 | "passed": false, 97 | "message": "invalid orientation for exterior ring in column \"geometry\"" 98 | }, 99 | { 100 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 101 | "run": true, 102 | "passed": true 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/geometry-incorrectly-oriented/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [], 9 | "orientation": "counterclockwise" 10 | } 11 | } 12 | }, 13 | "data": { 14 | "type": "FeatureCollection", 15 | "features": [ 16 | { 17 | "type": "Feature", 18 | "properties": { 19 | "name": "Right Hand Rule" 20 | }, 21 | "geometry": { 22 | "type": "Polygon", 23 | "coordinates": [ 24 | [[-10, -10], [-10, 10], [10, 10], [10, -10], [-10, -10]], 25 | [[-5, -5], [-5, 5], [5, 5], [5, -5], [-5, -5]] 26 | ] 27 | } 28 | } 29 | ] 30 | } 31 | } -------------------------------------------------------------------------------- /internal/validator/testdata/geometry-inside-antimeridian-spanning-bbox/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": true 62 | }, 63 | { 64 | "title": "optional \"epoch\" must be a number", 65 | "run": true, 66 | "passed": true 67 | }, 68 | { 69 | "title": "geometry columns must not be grouped", 70 | "run": true, 71 | "passed": true 72 | }, 73 | { 74 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 75 | "run": true, 76 | "passed": true 77 | }, 78 | { 79 | "title": "geometry columns must be required or optional, not repeated", 80 | "run": true, 81 | "passed": true 82 | }, 83 | { 84 | "title": "all geometry values match the \"encoding\" metadata", 85 | "run": true, 86 | "passed": true 87 | }, 88 | { 89 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 90 | "run": true, 91 | "passed": true 92 | }, 93 | { 94 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 95 | "run": true, 96 | "passed": true 97 | }, 98 | { 99 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 100 | "run": true, 101 | "passed": true 102 | } 103 | ], 104 | "metadataOnly": false 105 | } -------------------------------------------------------------------------------- /internal/validator/testdata/geometry-inside-antimeridian-spanning-bbox/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [], 9 | "bbox": [170, -10, -170, 10] 10 | } 11 | } 12 | }, 13 | "data": { 14 | "type": "FeatureCollection", 15 | "features": [ 16 | { 17 | "type": "Feature", 18 | "properties": {}, 19 | "geometry": { 20 | "type": "Point", 21 | "coordinates": [175, 0] 22 | } 23 | }, 24 | { 25 | "type": "Feature", 26 | "properties": {}, 27 | "geometry": { 28 | "type": "Point", 29 | "coordinates": [-175, 0] 30 | } 31 | } 32 | ] 33 | } 34 | } -------------------------------------------------------------------------------- /internal/validator/testdata/geometry-outside-antimeridian-spanning-bbox/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": true 62 | }, 63 | { 64 | "title": "optional \"epoch\" must be a number", 65 | "run": true, 66 | "passed": true 67 | }, 68 | { 69 | "title": "geometry columns must not be grouped", 70 | "run": true, 71 | "passed": true 72 | }, 73 | { 74 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 75 | "run": true, 76 | "passed": true 77 | }, 78 | { 79 | "title": "geometry columns must be required or optional, not repeated", 80 | "run": true, 81 | "passed": true 82 | }, 83 | { 84 | "title": "all geometry values match the \"encoding\" metadata", 85 | "run": true, 86 | "passed": true 87 | }, 88 | { 89 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 90 | "run": true, 91 | "passed": true 92 | }, 93 | { 94 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 95 | "run": true, 96 | "passed": true 97 | }, 98 | { 99 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 100 | "run": true, 101 | "passed": false, 102 | "message": "geometry in column \"geometry\" extends to -155.000000, outside of the bbox" 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/geometry-outside-antimeridian-spanning-bbox/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [], 9 | "bbox": [170, -10, -170, 10] 10 | } 11 | } 12 | }, 13 | "data": { 14 | "type": "FeatureCollection", 15 | "features": [ 16 | { 17 | "type": "Feature", 18 | "properties": {}, 19 | "geometry": { 20 | "type": "Point", 21 | "coordinates": [175, 0] 22 | } 23 | }, 24 | { 25 | "type": "Feature", 26 | "properties": {}, 27 | "geometry": { 28 | "type": "Point", 29 | "coordinates": [-155, 0] 30 | } 31 | } 32 | ] 33 | } 34 | } -------------------------------------------------------------------------------- /internal/validator/testdata/geometry-outside-bbox/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": true 62 | }, 63 | { 64 | "title": "optional \"epoch\" must be a number", 65 | "run": true, 66 | "passed": true 67 | }, 68 | { 69 | "title": "geometry columns must not be grouped", 70 | "run": true, 71 | "passed": true 72 | }, 73 | { 74 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 75 | "run": true, 76 | "passed": true 77 | }, 78 | { 79 | "title": "geometry columns must be required or optional, not repeated", 80 | "run": true, 81 | "passed": true 82 | }, 83 | { 84 | "title": "all geometry values match the \"encoding\" metadata", 85 | "run": true, 86 | "passed": true 87 | }, 88 | { 89 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 90 | "run": true, 91 | "passed": true 92 | }, 93 | { 94 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 95 | "run": true, 96 | "passed": true 97 | }, 98 | { 99 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 100 | "run": true, 101 | "passed": false, 102 | "message": "geometry in column \"geometry\" extends to 20.000000, east of the bbox" 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/geometry-outside-bbox/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": [], 9 | "bbox": [-10, -10, 10, 10] 10 | } 11 | } 12 | }, 13 | "data": { 14 | "type": "FeatureCollection", 15 | "features": [ 16 | { 17 | "type": "Feature", 18 | "properties": {}, 19 | "geometry": { 20 | "type": "Point", 21 | "coordinates": [20, 0] 22 | } 23 | } 24 | ] 25 | } 26 | } -------------------------------------------------------------------------------- /internal/validator/testdata/geometry-type-not-in-list/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": true 62 | }, 63 | { 64 | "title": "optional \"epoch\" must be a number", 65 | "run": true, 66 | "passed": true 67 | }, 68 | { 69 | "title": "geometry columns must not be grouped", 70 | "run": true, 71 | "passed": true 72 | }, 73 | { 74 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 75 | "run": true, 76 | "passed": true 77 | }, 78 | { 79 | "title": "geometry columns must be required or optional, not repeated", 80 | "run": true, 81 | "passed": true 82 | }, 83 | { 84 | "title": "all geometry values match the \"encoding\" metadata", 85 | "run": true, 86 | "passed": true 87 | }, 88 | { 89 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 90 | "run": true, 91 | "passed": false, 92 | "message": "unexpected geometry type \"Point\" for column \"geometry\"" 93 | }, 94 | { 95 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 96 | "run": true, 97 | "passed": true 98 | }, 99 | { 100 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 101 | "run": true, 102 | "passed": true 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/geometry-type-not-in-list/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": ["Polygon"] 9 | } 10 | } 11 | }, 12 | "data": { 13 | "type": "FeatureCollection", 14 | "features": [ 15 | { 16 | "type": "Feature", 17 | "properties": { 18 | "name": "Null Island" 19 | }, 20 | "geometry": { 21 | "type": "Point", 22 | "coordinates": [0, 0] 23 | } 24 | } 25 | ] 26 | } 27 | } -------------------------------------------------------------------------------- /internal/validator/testdata/missing-columns/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": false, 27 | "message": "missing \"columns\" in metadata" 28 | }, 29 | { 30 | "title": "column metadata must include the \"primary_column\" name", 31 | "run": false, 32 | "passed": false 33 | }, 34 | { 35 | "title": "column metadata must include a valid \"encoding\" string", 36 | "run": false, 37 | "passed": false 38 | }, 39 | { 40 | "title": "column metadata must include a \"geometry_types\" list", 41 | "run": false, 42 | "passed": false 43 | }, 44 | { 45 | "title": "optional \"crs\" must be null or a PROJJSON object", 46 | "run": false, 47 | "passed": false 48 | }, 49 | { 50 | "title": "optional \"orientation\" must be a valid string", 51 | "run": false, 52 | "passed": false 53 | }, 54 | { 55 | "title": "optional \"edges\" must be a valid string", 56 | "run": false, 57 | "passed": false 58 | }, 59 | { 60 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 61 | "run": false, 62 | "passed": false 63 | }, 64 | { 65 | "title": "optional \"epoch\" must be a number", 66 | "run": false, 67 | "passed": false 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": false, 72 | "passed": false 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": false, 77 | "passed": false 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": false, 82 | "passed": false 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": false, 87 | "passed": false 88 | }, 89 | { 90 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 91 | "run": false, 92 | "passed": false 93 | }, 94 | { 95 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 96 | "run": false, 97 | "passed": false 98 | }, 99 | { 100 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 101 | "run": false, 102 | "passed": false 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/missing-columns/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry" 5 | }, 6 | "data": { 7 | "type": "FeatureCollection", 8 | "features": [ 9 | { 10 | "type": "Feature", 11 | "properties": { 12 | "name": "Null Island" 13 | }, 14 | "geometry": { 15 | "type": "Point", 16 | "coordinates": [0, 0] 17 | } 18 | } 19 | ] 20 | } 21 | } -------------------------------------------------------------------------------- /internal/validator/testdata/missing-encoding/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": false, 37 | "message": "missing \"encoding\" for column \"geometry\"" 38 | }, 39 | { 40 | "title": "column metadata must include a \"geometry_types\" list", 41 | "run": true, 42 | "passed": true 43 | }, 44 | { 45 | "title": "optional \"crs\" must be null or a PROJJSON object", 46 | "run": true, 47 | "passed": true 48 | }, 49 | { 50 | "title": "optional \"orientation\" must be a valid string", 51 | "run": true, 52 | "passed": true 53 | }, 54 | { 55 | "title": "optional \"edges\" must be a valid string", 56 | "run": true, 57 | "passed": true 58 | }, 59 | { 60 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 61 | "run": true, 62 | "passed": true 63 | }, 64 | { 65 | "title": "optional \"epoch\" must be a number", 66 | "run": true, 67 | "passed": true 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": true, 72 | "passed": true 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": true, 77 | "passed": true 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": true, 82 | "passed": true 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": true, 87 | "passed": true 88 | }, 89 | { 90 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 91 | "run": true, 92 | "passed": true 93 | }, 94 | { 95 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 96 | "run": true, 97 | "passed": true 98 | }, 99 | { 100 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 101 | "run": true, 102 | "passed": true 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/missing-encoding/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "geometry_types": [] 8 | } 9 | } 10 | }, 11 | "data": { 12 | "type": "FeatureCollection", 13 | "features": [ 14 | { 15 | "type": "Feature", 16 | "properties": { 17 | "name": "Null Island" 18 | }, 19 | "geometry": { 20 | "type": "Point", 21 | "coordinates": [0, 0] 22 | } 23 | } 24 | ] 25 | } 26 | } -------------------------------------------------------------------------------- /internal/validator/testdata/missing-geometry-types/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": false, 42 | "message": "missing \"geometry_types\" for column \"geometry\"" 43 | }, 44 | { 45 | "title": "optional \"crs\" must be null or a PROJJSON object", 46 | "run": true, 47 | "passed": true 48 | }, 49 | { 50 | "title": "optional \"orientation\" must be a valid string", 51 | "run": true, 52 | "passed": true 53 | }, 54 | { 55 | "title": "optional \"edges\" must be a valid string", 56 | "run": true, 57 | "passed": true 58 | }, 59 | { 60 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 61 | "run": true, 62 | "passed": true 63 | }, 64 | { 65 | "title": "optional \"epoch\" must be a number", 66 | "run": true, 67 | "passed": true 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": true, 72 | "passed": true 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": true, 77 | "passed": true 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": true, 82 | "passed": true 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": true, 87 | "passed": true 88 | }, 89 | { 90 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 91 | "run": true, 92 | "passed": true 93 | }, 94 | { 95 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 96 | "run": true, 97 | "passed": true 98 | }, 99 | { 100 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 101 | "run": true, 102 | "passed": true 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/missing-geometry-types/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB" 8 | } 9 | } 10 | }, 11 | "data": { 12 | "type": "FeatureCollection", 13 | "features": [ 14 | { 15 | "type": "Feature", 16 | "properties": { 17 | "name": "Null Island" 18 | }, 19 | "geometry": { 20 | "type": "Point", 21 | "coordinates": [0, 0] 22 | } 23 | } 24 | ] 25 | } 26 | } -------------------------------------------------------------------------------- /internal/validator/testdata/missing-primary-column/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": false, 22 | "message": "missing \"primary_column\" in metadata" 23 | }, 24 | { 25 | "title": "metadata must include a \"columns\" object", 26 | "run": true, 27 | "passed": true 28 | }, 29 | { 30 | "title": "column metadata must include the \"primary_column\" name", 31 | "run": true, 32 | "passed": false, 33 | "message": "the \"\" column is not included in the column metadata" 34 | }, 35 | { 36 | "title": "column metadata must include a valid \"encoding\" string", 37 | "run": true, 38 | "passed": true 39 | }, 40 | { 41 | "title": "column metadata must include a \"geometry_types\" list", 42 | "run": true, 43 | "passed": true 44 | }, 45 | { 46 | "title": "optional \"crs\" must be null or a PROJJSON object", 47 | "run": true, 48 | "passed": true 49 | }, 50 | { 51 | "title": "optional \"orientation\" must be a valid string", 52 | "run": true, 53 | "passed": true 54 | }, 55 | { 56 | "title": "optional \"edges\" must be a valid string", 57 | "run": true, 58 | "passed": true 59 | }, 60 | { 61 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 62 | "run": true, 63 | "passed": true 64 | }, 65 | { 66 | "title": "optional \"epoch\" must be a number", 67 | "run": true, 68 | "passed": true 69 | }, 70 | { 71 | "title": "geometry columns must not be grouped", 72 | "run": true, 73 | "passed": true 74 | }, 75 | { 76 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 77 | "run": true, 78 | "passed": true 79 | }, 80 | { 81 | "title": "geometry columns must be required or optional, not repeated", 82 | "run": true, 83 | "passed": true 84 | }, 85 | { 86 | "title": "all geometry values match the \"encoding\" metadata", 87 | "run": true, 88 | "passed": true 89 | }, 90 | { 91 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 92 | "run": true, 93 | "passed": true 94 | }, 95 | { 96 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 97 | "run": true, 98 | "passed": true 99 | }, 100 | { 101 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 102 | "run": true, 103 | "passed": true 104 | } 105 | ], 106 | "metadataOnly": false 107 | } -------------------------------------------------------------------------------- /internal/validator/testdata/missing-primary-column/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0-beta.1", 4 | "columns": { 5 | "geometry": { 6 | "encoding": "WKB", 7 | "geometry_types": [] 8 | } 9 | } 10 | }, 11 | "data": { 12 | "type": "FeatureCollection", 13 | "features": [ 14 | { 15 | "type": "Feature", 16 | "properties": { 17 | "name": "Null Island" 18 | }, 19 | "geometry": { 20 | "type": "Point", 21 | "coordinates": [0, 0] 22 | } 23 | } 24 | ] 25 | } 26 | } -------------------------------------------------------------------------------- /internal/validator/testdata/missing-version/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": false, 17 | "message": "missing \"version\" in metadata" 18 | }, 19 | { 20 | "title": "metadata must include a \"primary_column\" string", 21 | "run": true, 22 | "passed": true 23 | }, 24 | { 25 | "title": "metadata must include a \"columns\" object", 26 | "run": true, 27 | "passed": true 28 | }, 29 | { 30 | "title": "column metadata must include the \"primary_column\" name", 31 | "run": true, 32 | "passed": true 33 | }, 34 | { 35 | "title": "column metadata must include a valid \"encoding\" string", 36 | "run": true, 37 | "passed": true 38 | }, 39 | { 40 | "title": "column metadata must include a \"geometry_types\" list", 41 | "run": true, 42 | "passed": true 43 | }, 44 | { 45 | "title": "optional \"crs\" must be null or a PROJJSON object", 46 | "run": true, 47 | "passed": true 48 | }, 49 | { 50 | "title": "optional \"orientation\" must be a valid string", 51 | "run": true, 52 | "passed": true 53 | }, 54 | { 55 | "title": "optional \"edges\" must be a valid string", 56 | "run": true, 57 | "passed": true 58 | }, 59 | { 60 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 61 | "run": true, 62 | "passed": true 63 | }, 64 | { 65 | "title": "optional \"epoch\" must be a number", 66 | "run": true, 67 | "passed": true 68 | }, 69 | { 70 | "title": "geometry columns must not be grouped", 71 | "run": true, 72 | "passed": true 73 | }, 74 | { 75 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 76 | "run": true, 77 | "passed": true 78 | }, 79 | { 80 | "title": "geometry columns must be required or optional, not repeated", 81 | "run": true, 82 | "passed": true 83 | }, 84 | { 85 | "title": "all geometry values match the \"encoding\" metadata", 86 | "run": true, 87 | "passed": true 88 | }, 89 | { 90 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 91 | "run": true, 92 | "passed": true 93 | }, 94 | { 95 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 96 | "run": true, 97 | "passed": true 98 | }, 99 | { 100 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 101 | "run": true, 102 | "passed": true 103 | } 104 | ], 105 | "metadataOnly": false 106 | } -------------------------------------------------------------------------------- /internal/validator/testdata/missing-version/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "primary_column": "geometry", 4 | "columns": { 5 | "geometry": { 6 | "encoding": "WKB", 7 | "geometry_types": [] 8 | } 9 | } 10 | }, 11 | "data": { 12 | "type": "FeatureCollection", 13 | "features": [ 14 | { 15 | "type": "Feature", 16 | "properties": { 17 | "name": "Null Island" 18 | }, 19 | "geometry": { 20 | "type": "Point", 21 | "coordinates": [0, 0] 22 | } 23 | } 24 | ] 25 | } 26 | } -------------------------------------------------------------------------------- /internal/validator/testdata/with-empty-geometry/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": true 62 | }, 63 | { 64 | "title": "optional \"epoch\" must be a number", 65 | "run": true, 66 | "passed": true 67 | }, 68 | { 69 | "title": "geometry columns must not be grouped", 70 | "run": true, 71 | "passed": true 72 | }, 73 | { 74 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 75 | "run": true, 76 | "passed": true 77 | }, 78 | { 79 | "title": "geometry columns must be required or optional, not repeated", 80 | "run": true, 81 | "passed": true 82 | }, 83 | { 84 | "title": "all geometry values match the \"encoding\" metadata", 85 | "run": true, 86 | "passed": true 87 | }, 88 | { 89 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 90 | "run": true, 91 | "passed": true 92 | }, 93 | { 94 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 95 | "run": true, 96 | "passed": true 97 | }, 98 | { 99 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 100 | "run": true, 101 | "passed": true 102 | } 103 | ], 104 | "metadataOnly": false 105 | } -------------------------------------------------------------------------------- /internal/validator/testdata/with-empty-geometry/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": ["Point"] 9 | } 10 | } 11 | }, 12 | "data": { 13 | "type": "FeatureCollection", 14 | "features": [ 15 | { 16 | "type": "Feature", 17 | "properties": { 18 | "name": "with geometry" 19 | }, 20 | "geometry": { 21 | "type": "Point", 22 | "coordinates": [0, 0] 23 | } 24 | }, 25 | { 26 | "type": "Feature", 27 | "properties": { 28 | "name": "empty geometry" 29 | }, 30 | "geometry": { 31 | "type": "Point", 32 | "coordinates": [] 33 | } 34 | } 35 | ] 36 | } 37 | } -------------------------------------------------------------------------------- /internal/validator/testdata/with-null-geometry/expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "checks": [ 3 | { 4 | "title": "file must include a \"geo\" metadata key", 5 | "run": true, 6 | "passed": true 7 | }, 8 | { 9 | "title": "metadata must be a JSON object", 10 | "run": true, 11 | "passed": true 12 | }, 13 | { 14 | "title": "metadata must include a \"version\" string", 15 | "run": true, 16 | "passed": true 17 | }, 18 | { 19 | "title": "metadata must include a \"primary_column\" string", 20 | "run": true, 21 | "passed": true 22 | }, 23 | { 24 | "title": "metadata must include a \"columns\" object", 25 | "run": true, 26 | "passed": true 27 | }, 28 | { 29 | "title": "column metadata must include the \"primary_column\" name", 30 | "run": true, 31 | "passed": true 32 | }, 33 | { 34 | "title": "column metadata must include a valid \"encoding\" string", 35 | "run": true, 36 | "passed": true 37 | }, 38 | { 39 | "title": "column metadata must include a \"geometry_types\" list", 40 | "run": true, 41 | "passed": true 42 | }, 43 | { 44 | "title": "optional \"crs\" must be null or a PROJJSON object", 45 | "run": true, 46 | "passed": true 47 | }, 48 | { 49 | "title": "optional \"orientation\" must be a valid string", 50 | "run": true, 51 | "passed": true 52 | }, 53 | { 54 | "title": "optional \"edges\" must be a valid string", 55 | "run": true, 56 | "passed": true 57 | }, 58 | { 59 | "title": "optional \"bbox\" must be an array of 4 or 6 numbers", 60 | "run": true, 61 | "passed": true 62 | }, 63 | { 64 | "title": "optional \"epoch\" must be a number", 65 | "run": true, 66 | "passed": true 67 | }, 68 | { 69 | "title": "geometry columns must not be grouped", 70 | "run": true, 71 | "passed": true 72 | }, 73 | { 74 | "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", 75 | "run": true, 76 | "passed": true 77 | }, 78 | { 79 | "title": "geometry columns must be required or optional, not repeated", 80 | "run": true, 81 | "passed": true 82 | }, 83 | { 84 | "title": "all geometry values match the \"encoding\" metadata", 85 | "run": true, 86 | "passed": true 87 | }, 88 | { 89 | "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", 90 | "run": true, 91 | "passed": true 92 | }, 93 | { 94 | "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", 95 | "run": true, 96 | "passed": true 97 | }, 98 | { 99 | "title": "all geometries must fall within the \"bbox\" metadata (if present)", 100 | "run": true, 101 | "passed": true 102 | } 103 | ], 104 | "metadataOnly": false 105 | } -------------------------------------------------------------------------------- /internal/validator/testdata/with-null-geometry/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "version": "1.0.0", 4 | "primary_column": "geometry", 5 | "columns": { 6 | "geometry": { 7 | "encoding": "WKB", 8 | "geometry_types": ["Point"] 9 | } 10 | } 11 | }, 12 | "data": { 13 | "type": "FeatureCollection", 14 | "features": [ 15 | { 16 | "type": "Feature", 17 | "properties": { 18 | "name": "with geometry" 19 | }, 20 | "geometry": { 21 | "type": "Point", 22 | "coordinates": [0, 0] 23 | } 24 | }, 25 | { 26 | "type": "Feature", 27 | "properties": { 28 | "name": "without geometry" 29 | }, 30 | "geometry": null 31 | } 32 | ] 33 | } 34 | } -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # gpq 2 | 3 | A utility for working with [GeoParquet](https://github.com/opengeospatial/geoparquet). 4 | 5 | ## Installation 6 | 7 | The `gpq` program can be installed by downloading one of the archives from [the latest release](https://github.com/planetlabs/gpq/releases). 8 | 9 | Extract the archive and place the `gpq` executable somewhere on your path. See a list of available commands by running `gpq` in your terminal. 10 | 11 | For Homebrew users, you can install `gpq` from the [Planet tap](https://github.com/planetlabs/homebrew-tap): 12 | 13 | ```shell 14 | # run `brew update` first if you have used this tap previously and want the latest formula 15 | brew install planetlabs/tap/gpq 16 | ``` 17 | 18 | ## WebAssembly 19 | 20 | In addition to the CLI program, the `gpq` utility is built as a WebAssembly binary. The WASM build can be downloaded from [the latest release](https://github.com/planetlabs/gpq/releases). 21 | 22 | To give it a try without downloading or installing anything, see https://planetlabs.github.io/gpq/. 23 | 24 | ## Command Line Utility 25 | 26 | The `gpq` program can be used to validate GeoParquet files and to convert to and from GeoJSON. 27 | 28 | ```shell 29 | # see the available commands 30 | gpq --help 31 | ``` 32 | 33 | ### validate 34 | 35 | The `validate` command generates a validation report for a GeoParquet file. 36 | 37 | ```shell 38 | gpq validate example.parquet 39 | ``` 40 | 41 | By default, the command writes out a text report with a list of status checks. The command exits with status code 1 if one or more of the checks does not pass. 42 | 43 | The validation includes scanning the data to ensure that values in geometry columns conform with the specification (making assertions about the encoding, ring orientation, bounding box, and alignment with other metadata). For very large GeoParquet files, the rules that scan the geometry data can be skipped with the `--metadata-only` argument. With this argument, the command only runs rules related to the file metadata and Parquet schema. 44 | 45 | To generate a JSON report instead of the text report, use the `--format json` argument. 46 | 47 | See `gpq validate --help` for the full list of options. 48 | 49 | ### convert 50 | 51 | The `convert` command can convert a GeoJSON file to GeoParquet or a GeoParquet file to GeoJSON. 52 | 53 | ```shell 54 | # read geojson and write geoparquet 55 | gpq convert example.geojson example.parquet 56 | ``` 57 | 58 | ```shell 59 | # read geoparquet and write geojson 60 | gpq convert example.parquet example.geojson 61 | ``` 62 | 63 | The `convert` command can also be used to convert an input Parquet file without "geo" metadata to a valid GeoParquet file. 64 | 65 | ```shell 66 | # read parquet and write geoparquet 67 | gpq convert non-geo.parquet valid-geo.parquet 68 | ``` 69 | 70 | When reading from a Parquet file and writing out GeoParquet, the input geometry values can be WKB or WKT encoded. The output geometry values will always be WKB encoded. 71 | 72 | The `--input-primary-column` argument can be used to provide a primary geometry column name when reading Parquet files without "geo" metadata (defaults to `geometry`). 73 | 74 | The `--compression` argument can be used to control the compression codec used when writing GeoParquet. See `gpq convert --help` for the available options. 75 | 76 | 77 | ### describe 78 | 79 | The `describe` command prints schema information and metadata about a GeoParquet file. 80 | 81 | ```shell 82 | gpq describe example.parquet 83 | ``` 84 | 85 | ### extract 86 | 87 | The `extract` command can be use to extract columns and/or rows from a local or remote GeoParquet file. 88 | 89 | ``` 90 | gpq extract input.parquet output.parquet --bbox=xmin,ymin,xmax,ymax --drop-cols=col1,col2 91 | ``` 92 | 93 | Instead of negatively selecting columns by specifying which ones to drop (`--drop-cols`), you can alternatively use the `--keep-only-cols` argument to explicitely select those columns that you wish to keep in the data set. 94 | 95 | The `--bbox` argument allows you to extract features whose bounding box intersects with the provided bbox. Note that this doesn't support exact geometry filtering and will only operate on bounding boxes of full feature geometries. It is thus recommended to use the `--bbox` argument for preliminary filtering only. The algorithm will attempt to use an existing bounding box column in the file. If bounding box information is not available, the bounding boxes will be computed on the fly. If the GeoParquet file is spatially partitioned using row groups, the algorithm will use row group statistics to speed up the filtering process. 96 | 97 | ## Limitations 98 | 99 | * Non-geographic CRS information is not preserved when converting GeoParquet to GeoJSON. 100 | * Page and row group size is not configurable when writing GeoParquet. This may change soon. 101 | * Feature identifiers in GeoJSON are not written to GeoParquet columns. This may change soon. 102 | -------------------------------------------------------------------------------- /web/.gitignore: -------------------------------------------------------------------------------- 1 | *.wasm 2 | wasm_exec.js 3 | /node_modules/ -------------------------------------------------------------------------------- /web/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | GPQ - GeoParquet Utility 9 | 10 | 46 | 47 | 48 | 49 |
50 |

51 | GeoParquet <-> GeoJSON 52 |

53 |

54 | Convert GeoJSON 55 | to GeoParquet 56 | and vice versa without leaving your browser. 57 |

58 |
59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /web/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "gpq", 3 | "private": true, 4 | "scripts": { 5 | "test": "eslint main.js" 6 | }, 7 | "devDependencies": { 8 | "eslint": "^8.40.0", 9 | "eslint-config-planet": "^22.1.0", 10 | "vite": "^6.0.1" 11 | }, 12 | "eslintConfig": { 13 | "extends": "planet", 14 | "parserOptions": { 15 | "ecmaVersion": "latest" 16 | }, 17 | "ignorePatterns": [ 18 | "wasm_exec.js" 19 | ], 20 | "globals": { 21 | "Go": "readonly" 22 | }, 23 | "rules": { 24 | "import/no-unresolved": [ 25 | "error", 26 | { 27 | "ignore": [ 28 | "^https?://" 29 | ] 30 | } 31 | ] 32 | } 33 | } 34 | } 35 | --------------------------------------------------------------------------------