├── .README.md2
├── LICENSE.md5
├── snap.login.enc
├── Dockerfile
├── internal
├── cli
│ ├── export_test.go
│ ├── common_test.go
│ ├── generate_test.go
│ ├── generate.go
│ ├── list.go
│ ├── list_test.go
│ ├── root.go
│ ├── root_test.go
│ ├── download.go
│ └── download_test.go
├── element
│ ├── element.go
│ └── element_test.go
├── generator
│ ├── importer
│ │ └── geofabrik
│ │ │ ├── geofabrik_test.go
│ │ │ └── geofabrik.go
│ ├── generator.go
│ └── generator_test.go
├── lists
│ ├── lists_test.go
│ └── lists.go
├── downloader
│ ├── hash.go
│ ├── hash_test.go
│ ├── download_test.go
│ └── download.go
├── scrapper
│ ├── bbbike
│ │ └── bbbike.go
│ ├── scrapper.go
│ ├── geofabrik
│ │ └── geofabrik.go
│ ├── geo2day
│ │ ├── geo2day.go
│ │ └── geo2day_test.go
│ └── openstreetmapfr
│ │ └── openstreetmapfr.go
└── config
│ ├── config_test.go
│ └── config.go
├── cmd
└── download-geofabrik
│ ├── main.go
│ └── main_test.go
├── .github
├── workflows
│ ├── rebase.yml
│ ├── gotest.yml
│ ├── yml_auto-pr.yml
│ ├── push.yml
│ ├── automerge-dependabot.yml
│ ├── goreleaser.yml
│ ├── golangci-lint.yml
│ ├── genchangelog.yml
│ ├── genyml.yml
│ ├── codeql-analysis.yml
│ └── coverage.yml
└── dependabot.yml
├── .renovaterc.json
├── .vscode
└── launch.json
├── .chglog
├── config.yml
└── CHANGELOG.tpl.md
├── .pre-commit
├── .gitignore
├── .README.md1
├── go.mod
├── .goreleaser.yaml
├── pkg
└── formats
│ ├── formats.go
│ └── formats_test.go
├── .golangci.yml
├── LICENSE
└── go.sum
/.README.md2:
--------------------------------------------------------------------------------
1 | ```
2 |
3 | ## List of elements
4 |
--------------------------------------------------------------------------------
/LICENSE.md5:
--------------------------------------------------------------------------------
1 | 65d26fcc2f35ea6a181ac777e42db1ea ../../LICENSE
--------------------------------------------------------------------------------
/snap.login.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/julien-noblet/download-geofabrik/HEAD/snap.login.enc
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM scratch
2 | ENTRYPOINT ["/download-geofabrik"]
3 | COPY download-geofabrik /
4 | COPY geofabrik.yml /
5 | COPY bbbike.yml /
6 | COPY openstreetmap.fr.yml /
7 | COPY geo2day.yml /
--------------------------------------------------------------------------------
/internal/cli/export_test.go:
--------------------------------------------------------------------------------
1 | package cli
2 |
3 | // RootCmd is exported for testing purposes only.
4 | var RootCmd = rootCmd
5 |
6 | func ResetGlobs() {
7 | cfgFile = ""
8 | service = ""
9 | }
10 |
--------------------------------------------------------------------------------
/cmd/download-geofabrik/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "os"
5 |
6 | "github.com/julien-noblet/download-geofabrik/internal/cli"
7 | )
8 |
9 | func main() {
10 | if err := cli.Execute(); err != nil {
11 | os.Exit(1)
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/internal/cli/common_test.go:
--------------------------------------------------------------------------------
1 | package cli_test
2 |
3 | const testConfigContent = `
4 | elements:
5 | test-elem:
6 | id: "test-elem"
7 | files: ["osm.pbf"]
8 | formats:
9 | osm.pbf:
10 | ext: "osm.pbf"
11 | loc: "osm.pbf"
12 | `
13 |
--------------------------------------------------------------------------------
/.github/workflows/rebase.yml:
--------------------------------------------------------------------------------
1 | on:
2 | issue_comment:
3 | types: [created]
4 | name: Automatic Rebase
5 | jobs:
6 | rebase:
7 | name: Rebase
8 | if: contains(github.event.comment.body, '/rebase')
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@v6
12 | - name: Automatic Rebase
13 | uses: cirrus-actions/rebase@master
14 | env:
15 | GITHUB_TOKEN: ${{ secrets.TOKEN }}
16 |
--------------------------------------------------------------------------------
/.github/workflows/gotest.yml:
--------------------------------------------------------------------------------
1 | name: gotest
2 | on:
3 | pull_request:
4 | push:
5 | branches:
6 | - main
7 | jobs:
8 | gotest:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@v6
12 | with:
13 | ref: ${{ github.head_ref }}
14 | - uses: actions/setup-go@v6
15 | with:
16 | go-version: stable
17 | check-latest: true
18 | cache: true
19 | - run: go test -v ./...
--------------------------------------------------------------------------------
/.renovaterc.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": [
3 | "config:recommended"
4 | ],
5 | "automerge": true,
6 | "major": {
7 | "automerge": false
8 | },
9 | "packageRules": [
10 | {
11 | "groupName": "golang.org/x",
12 | "matchPackageNames": [
13 | "/golang.org/x/"
14 | ]
15 | },
16 | {
17 | "matchDepTypes": [
18 | "devDependencies"
19 | ],
20 | "automerge": true
21 | }
22 | ],
23 | "rebaseWhen": "behind-base-branch"
24 | }
25 |
--------------------------------------------------------------------------------
/.github/workflows/yml_auto-pr.yml:
--------------------------------------------------------------------------------
1 | name: PR for release branch
2 | on:
3 | push:
4 | branches:
5 | - new_yamls
6 | jobs:
7 | release_pull_request:
8 | runs-on: ubuntu-latest
9 | name: release_pull_request
10 | steps:
11 | - name: checkout
12 | uses: actions/checkout@v6
13 | - name: Create PR to branch
14 | uses: gorillio/github-action-cherry-pick@master
15 | with:
16 | pr_branch: 'master'
17 | env:
18 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
19 | GITBOT_EMAIL: action@github.com
20 | DRY_RUN: false
--------------------------------------------------------------------------------
/internal/cli/generate_test.go:
--------------------------------------------------------------------------------
1 | package cli_test
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/julien-noblet/download-geofabrik/internal/cli"
7 | "github.com/stretchr/testify/assert"
8 | )
9 |
10 | func TestGenerateCmd_UnknownService(t *testing.T) {
11 | // Reset globs not needed as we don't rely on it?
12 | // But Execute uses it.
13 | cli.ResetGlobs()
14 |
15 | cli.RootCmd.SetArgs([]string{"generate", "--service", "unknown-service"})
16 |
17 | err := cli.Execute()
18 | assert.Error(t, err)
19 | // Check if error message is relevant?
20 | // "generation failed: ..."
21 | }
22 |
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | // Utilisez IntelliSense pour en savoir plus sur les attributs possibles.
3 | // Pointez pour afficher la description des attributs existants.
4 | // Pour plus d'informations, visitez : https://go.microsoft.com/fwlink/?linkid=830387
5 | "version": "0.2.0",
6 | "configurations": [
7 | {
8 | "name": "Launch",
9 | "type": "go",
10 | "request": "launch",
11 | "mode": "auto",
12 | "program": "${fileDirname}",
13 | "env": {},
14 | "args": []
15 | }
16 | ]
17 | }
--------------------------------------------------------------------------------
/.github/workflows/push.yml:
--------------------------------------------------------------------------------
1 | name: gobenchdata publish
2 | permissions:
3 | # contents permission to update benchmark contents in 'benchmarks' branch
4 | contents: write
5 | on: push
6 | jobs:
7 | publish:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - name: checkout
11 | uses: actions/checkout@v6
12 | - name: gobenchdata publish
13 | uses: bobheadxi/gobenchdata@v1
14 | with:
15 | PRUNE_COUNT: 30
16 | GO_TEST_FLAGS: -cpu 1,2
17 | PUBLISH: true
18 | PUBLISH_BRANCH: gh-pages
19 | env:
20 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
21 |
--------------------------------------------------------------------------------
/.chglog/config.yml:
--------------------------------------------------------------------------------
1 | style: github
2 | template: CHANGELOG.tpl.md
3 | info:
4 | title: CHANGELOG
5 | repository_url: https://github.com/julien-noblet/download-geofabrik
6 | options:
7 | commits:
8 | # filters:
9 | # Type:
10 | # - feat
11 | # - fix
12 | # - perf
13 | # - refactor
14 | commit_groups:
15 | # title_maps:
16 | # feat: Features
17 | # fix: Bug Fixes
18 | # perf: Performance Improvements
19 | # refactor: Code Refactoring
20 | header:
21 | pattern: "^(\\w*)(?:\\(([\\w\\$\\.\\-\\*\\s]*)\\))?\\:\\s(.*)$"
22 | pattern_maps:
23 | - Type
24 | - Scope
25 | - Subject
26 | notes:
27 | keywords:
28 | - BREAKING CHANGE
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "gomod" # See documentation for possible values
9 | directory: "/" # Location of package manifests
10 | schedule:
11 | interval: "weekly"
12 | allow:
13 | # Allow both direct and indirect updates for all packages
14 | - dependency-type: "all"
15 | # Add assignees
16 | assignees:
17 | - "julien-noblet"
18 |
--------------------------------------------------------------------------------
/cmd/download-geofabrik/main_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "os/exec"
5 | "path/filepath"
6 | "testing"
7 |
8 | "github.com/stretchr/testify/assert"
9 | "github.com/stretchr/testify/require"
10 | )
11 |
12 | func TestMainBuild(t *testing.T) {
13 | // Verify that the command builds successfully
14 | tmpDir := t.TempDir()
15 |
16 | binPath := filepath.Join(tmpDir, "download-geofabrik")
17 |
18 | cmd := exec.CommandContext(t.Context(), "go", "build", "-o", binPath, ".")
19 | output, err := cmd.CombinedOutput()
20 | require.NoError(t, err, "Build failed: %s", output)
21 |
22 | // Verify basic run (help)
23 | cmd = exec.CommandContext(t.Context(), binPath, "--help")
24 | output, err = cmd.CombinedOutput()
25 | require.NoError(t, err)
26 | assert.Contains(t, string(output), "download-geofabrik")
27 | }
28 |
--------------------------------------------------------------------------------
/.pre-commit:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | #
3 | gofmt -s -w *.go
4 | go mod tidy
5 | cat .README.md1 > README.md
6 | go run download-geofabrik.go --help-long >> README.md
7 | cat .README.md2 >> README.md
8 | go run download-geofabrik.go list --markdown >> README.md
9 | echo "" >> README.md
10 | echo "## List of elements from openstreetmap.fr" >> README.md
11 | go run download-geofabrik.go --service "openstreetmap.fr" list --markdown >> README.md
12 | echo "" >> README.md
13 | echo "## List of elements from bbbike.org" >> README.md
14 | go run download-geofabrik.go --service "bbbike" list --markdown >> README.md
15 | echo "" >> README.md
16 | echo "## List of elements from geo2day" >> README.md
17 | go run download-geofabrik.go --service "geo2day" list --markdown >> README.md
18 | echo "" >> README.md
19 | git add geofabrik.yml
20 | git add openstreetmap.fr.yml
21 | git add bbbike.yml
22 | git add README.md
23 |
--------------------------------------------------------------------------------
/.github/workflows/automerge-dependabot.yml:
--------------------------------------------------------------------------------
1 | name: Dependabot auto-merge
2 | on: pull_request
3 |
4 | permissions:
5 | contents: write
6 | pull-requests: write
7 |
8 | jobs:
9 | dependabot:
10 | runs-on: ubuntu-latest
11 | if: github.event.pull_request.user.login == 'dependabot[bot]' && github.repository == 'julien-noblet/download-geofabrik'
12 | steps:
13 | - name: Dependabot metadata
14 | id: metadata
15 | uses: dependabot/fetch-metadata@v2
16 | with:
17 | github-token: "${{ secrets.GITHUB_TOKEN }}"
18 | - name: Enable auto-merge for Dependabot PRs
19 | if: contains(steps.metadata.outputs.dependency-names, 'my-dependency') && steps.metadata.outputs.update-type == 'version-update:semver-patch'
20 | run: gh pr merge --auto --merge "$PR_URL"
21 | env:
22 | PR_URL: ${{github.event.pull_request.html_url}}
23 | GH_TOKEN: ${{secrets.GITHUB_TOKEN}}
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Compiled Object files, Static and Dynamic libs (Shared Objects)
4 | *.o
5 | *.a
6 | *.so
7 |
8 | # Makefile build folder
9 | # download-geofabrik*/
10 |
11 | # Linux binary
12 | //download-geofabrik
13 |
14 | # Folders
15 | dist
16 | _obj
17 | _test
18 | darwin32
19 | darwin64
20 | linux32
21 | linux64
22 | win32
23 | win64
24 |
25 | # Architecture specific extensions/prefixes
26 | *.[568vq]
27 | [568vq].out
28 |
29 | *.cgo1.go
30 | *.cgo2.c
31 | _cgo_defun.c
32 | _cgo_gotypes.go
33 | _cgo_export.*
34 |
35 | _testmain.go
36 |
37 | # Binaries for programs and plugins
38 | *.dll
39 | *.dylib
40 | *.exe
41 | *.exe~
42 | *.prof
43 |
44 | # Downloaded files
45 | *.poly
46 | *.osm.*
47 | *.zip
48 | *.md5
49 | *.pbf
50 |
51 | # need for tests
52 | !LICENSE.md5
53 |
54 | snap.login
55 |
56 | # Test binary, built with `go test -c`
57 | *.test
58 |
59 | # Output of the go coverage tool, specifically when used with LiteIDE
60 | *.out
61 |
62 |
63 | dist/
64 |
65 |
66 | /download-geofabrik
67 |
--------------------------------------------------------------------------------
/internal/cli/generate.go:
--------------------------------------------------------------------------------
1 | package cli
2 |
3 | import (
4 | "fmt"
5 | "log/slog"
6 |
7 | "github.com/julien-noblet/download-geofabrik/internal/config"
8 | "github.com/julien-noblet/download-geofabrik/internal/generator"
9 | "github.com/spf13/cobra"
10 | "github.com/spf13/viper"
11 | )
12 |
13 | var generateProgress bool
14 |
15 | var generateCmd = &cobra.Command{
16 | Use: "generate",
17 | Short: "Generate configuration file",
18 | RunE: runGenerate,
19 | }
20 |
21 | func RegisterGenerateCmd() {
22 | rootCmd.AddCommand(generateCmd)
23 | generateCmd.Flags().BoolVarP(&generateProgress, "progress", "p", true, "Show progress bar")
24 | }
25 |
26 | func runGenerate(_ *cobra.Command, _ []string) error {
27 | cfgFile := viper.ConfigFileUsed()
28 | if cfgFile == "" {
29 | if service != "" {
30 | cfgFile = service + ".yml"
31 | } else {
32 | cfgFile = config.DefaultConfigFile
33 | }
34 | }
35 |
36 | slog.Info("Generating config", "service", service, "file", cfgFile)
37 |
38 | if err := generator.Generate(service, generateProgress, cfgFile); err != nil {
39 | slog.Error("Generation failed", "error", err)
40 |
41 | return fmt.Errorf("generation failed: %w", err)
42 | }
43 |
44 | return nil
45 | }
46 |
--------------------------------------------------------------------------------
/.github/workflows/goreleaser.yml:
--------------------------------------------------------------------------------
1 | # .github/workflows/release.yml
2 | name: goreleaser
3 |
4 | on:
5 | #pull_request: # do not run on PRs
6 | push:
7 | # run only against tags
8 | tags:
9 | - "*"
10 |
11 | permissions:
12 | contents: write
13 | # packages: write
14 | # issues: write
15 |
16 | jobs:
17 | goreleaser:
18 | runs-on: ubuntu-latest
19 | steps:
20 | - name: Checkout
21 | uses: actions/checkout@v6
22 | with:
23 | fetch-depth: 0
24 | - name: Set up Go
25 | uses: actions/setup-go@v6
26 | with:
27 | go-version: stable
28 | check-latest: true
29 | cache: true
30 | # More assembly might be required: Docker logins, GPG, etc.
31 | # It all depends on your needs.
32 | - name: Run GoReleaser
33 | uses: goreleaser/goreleaser-action@v6
34 | with:
35 | # either 'goreleaser' (default) or 'goreleaser-pro'
36 | distribution: goreleaser
37 | # 'latest', 'nightly', or a semver
38 | version: "latest"
39 | args: release --clean --timeout 120m
40 | env:
41 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
42 | # Your GoReleaser Pro key, if you are using the 'goreleaser-pro' distribution
43 | # GORELEASER_KEY: ${{ secrets.GORELEASER_KEY }}
--------------------------------------------------------------------------------
/internal/element/element.go:
--------------------------------------------------------------------------------
1 | package element
2 |
3 | // Element represents a part to download with formats, name, parent, etc.
4 | type Element struct {
5 | ID string `yaml:"id"`
6 | File string `yaml:"file,omitempty"`
7 | Name string `yaml:"name,omitempty"`
8 | Parent string `yaml:"parent,omitempty"`
9 | Formats Formats `yaml:"files,omitempty"`
10 | Meta bool `yaml:"meta,omitempty"`
11 | }
12 |
13 | type Formats []string
14 |
15 | // MapElement contains all Elements.
16 | type MapElement map[string]Element
17 |
18 | // HasParent checks if the element has a parent.
19 | func (e *Element) HasParent() bool {
20 | return e.Parent != ""
21 | }
22 |
23 | // Contains checks if the format list contains a specific format.
24 | func (f *Formats) Contains(format string) bool {
25 | for _, existingFormat := range *f {
26 | if format == existingFormat {
27 | return true
28 | }
29 | }
30 |
31 | return false
32 | }
33 |
34 | // CreateParentElement creates a parent element for the given element.
35 | // Useful for meta parents.
36 | func CreateParentElement(e *Element, grandparentID string) *Element {
37 | if e.HasParent() {
38 | return &Element{
39 | ID: e.Parent,
40 | File: "",
41 | Name: e.Parent,
42 | Parent: grandparentID,
43 | Formats: Formats{},
44 | Meta: true,
45 | }
46 | }
47 |
48 | return nil
49 | }
50 |
--------------------------------------------------------------------------------
/.README.md1:
--------------------------------------------------------------------------------
1 | # download-geofabrik
2 |
3 | [](https://gitter.im/julien-noblet/download-geofabrik?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
4 | 
5 | [](https://app.fossa.io/projects/git%2Bgithub.com%2Fjulien-noblet%2Fdownload-geofabrik?ref=badge_shield)
6 |
7 | ## Version 2
8 | Warning! command line have changed from V1
9 | see [Usage](#usage)
10 |
11 | ## Docker
12 | ```shell
13 | docker run -it --rm -v $PWD:/data download-geofabrik:latest download element
14 | ```
15 | where ```element``` is one of geofabrik's files.
16 | ## License
17 | [](https://app.fossa.io/projects/git%2Bgithub.com%2Fjulien-noblet%2Fdownload-geofabrik?ref=badge_large)
18 |
19 | ## Usage
20 | ```shell
21 | ./download-geofabrik download element
22 | ```
23 | where ```element``` is one of geofabrik's files.
24 | ```shell
25 | ./download-geofabrik --help-long
26 |
27 |
--------------------------------------------------------------------------------
/internal/cli/list.go:
--------------------------------------------------------------------------------
1 | package cli
2 |
3 | import (
4 | "fmt"
5 | "log/slog"
6 |
7 | "github.com/julien-noblet/download-geofabrik/internal/config"
8 | "github.com/julien-noblet/download-geofabrik/internal/lists"
9 | "github.com/spf13/cobra"
10 | "github.com/spf13/viper"
11 | )
12 |
13 | var markdown bool
14 |
15 | var listCmd = &cobra.Command{
16 | Use: "list",
17 | Short: "Show elements available",
18 | RunE: runList,
19 | }
20 |
21 | func RegisterListCmd() {
22 | rootCmd.AddCommand(listCmd)
23 | listCmd.Flags().BoolVar(&markdown, "markdown", false, "Generate list in Markdown format")
24 | }
25 |
26 | func runList(_ *cobra.Command, _ []string) error {
27 | cfgFile := viper.ConfigFileUsed()
28 | if cfgFile == "" {
29 | if service != "" {
30 | cfgFile = service + ".yml"
31 | } else {
32 | cfgFile = config.DefaultConfigFile
33 | }
34 | }
35 |
36 | opts := &config.Options{
37 | ConfigFile: cfgFile,
38 | }
39 |
40 | cfg, err := config.LoadConfig(opts.ConfigFile)
41 | if err != nil {
42 | slog.Error("Failed to load config", "file", opts.ConfigFile, "error", err)
43 |
44 | return fmt.Errorf("failed to load config: %w", err)
45 | }
46 |
47 | format := ""
48 | if markdown {
49 | format = lists.MarkdownFormat
50 | }
51 |
52 | if err := lists.ListAllRegions(cfg, format); err != nil {
53 | slog.Error("Failed to list all regions", "error", err)
54 |
55 | return fmt.Errorf("failed to list regions: %w", err)
56 | }
57 |
58 | return nil
59 | }
60 |
--------------------------------------------------------------------------------
/.chglog/CHANGELOG.tpl.md:
--------------------------------------------------------------------------------
1 | {{ if .Versions -}}
2 |
3 | ## [Unreleased]
4 |
5 | {{ if .Unreleased.CommitGroups -}}
6 | {{ range .Unreleased.CommitGroups -}}
7 | ### {{ .Title }}
8 | {{ range .Commits -}}
9 | - {{ if .Scope }}**{{ .Scope }}:** {{ end }}{{ .Subject }}
10 | {{ end }}
11 | {{ end -}}
12 | {{ end -}}
13 | {{ end -}}
14 |
15 | {{ range .Versions }}
16 |
17 | ## {{ if .Tag.Previous }}[{{ .Tag.Name }}]{{ else }}{{ .Tag.Name }}{{ end }} - {{ datetime "2006-01-02" .Tag.Date }}
18 | {{ range .CommitGroups -}}
19 | ### {{ .Title }}
20 | {{ range .Commits -}}
21 | - {{ if .Scope }}**{{ .Scope }}:** {{ end }}{{ .Subject }}
22 | {{ end }}
23 | {{ end -}}
24 |
25 | {{- if .RevertCommits -}}
26 | ### Reverts
27 | {{ range .RevertCommits -}}
28 | - {{ .Revert.Header }}
29 | {{ end }}
30 | {{ end -}}
31 |
32 | {{- if .MergeCommits -}}
33 | ### Pull Requests
34 | {{ range .MergeCommits -}}
35 | - {{ .Header }}
36 | {{ end }}
37 | {{ end -}}
38 |
39 | {{- if .NoteGroups -}}
40 | {{ range .NoteGroups -}}
41 | ### {{ .Title }}
42 | {{ range .Notes }}
43 | {{ .Body }}
44 | {{ end }}
45 | {{ end -}}
46 | {{ end -}}
47 | {{ end -}}
48 |
49 | {{- if .Versions }}
50 | [Unreleased]: {{ .Info.RepositoryURL }}/compare/{{ $latest := index .Versions 0 }}{{ $latest.Tag.Name }}...HEAD
51 | {{ range .Versions -}}
52 | {{ if .Tag.Previous -}}
53 | [{{ .Tag.Name }}]: {{ $.Info.RepositoryURL }}/compare/{{ .Tag.Previous.Name }}...{{ .Tag.Name }}
54 | {{ end -}}
55 | {{ end -}}
56 | {{ end -}}
--------------------------------------------------------------------------------
/internal/generator/importer/geofabrik/geofabrik_test.go:
--------------------------------------------------------------------------------
1 | package geofabrik_test
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/julien-noblet/download-geofabrik/internal/generator/importer/geofabrik"
7 | "github.com/spf13/viper"
8 | "github.com/stretchr/testify/assert"
9 | "github.com/stretchr/testify/require"
10 | )
11 |
12 | func TestGetIndex(t *testing.T) {
13 | t.Parallel()
14 | viper.Set("log", true)
15 |
16 | tests := []struct {
17 | name string
18 | myURL string
19 | wantErr bool
20 | }{
21 | // TODO: Add test cases.
22 | {
23 | name: "Test",
24 | myURL: geofabrik.GeofabrikIndexURL,
25 | wantErr: false,
26 | },
27 | {
28 | name: "Test 404",
29 | myURL: "https://google.com/404",
30 | wantErr: true,
31 | },
32 | }
33 | for _, thisTest := range tests {
34 | t.Run(thisTest.name, func(t *testing.T) {
35 | t.Parallel()
36 |
37 | index, err := geofabrik.GetIndex(thisTest.myURL)
38 | if thisTest.wantErr {
39 | require.Error(t, err)
40 | } else {
41 | assert.NotNil(t, index)
42 |
43 | if len(index.Features) < 10 {
44 | t.Errorf("GetIndex() error I should have more features!!!")
45 | }
46 |
47 | converted, err := geofabrik.Convert(index)
48 | if converted == nil || err != nil {
49 | t.Errorf("GetIndex() error cant convert !!!\n%v", err)
50 | }
51 |
52 | if e, err := converted.GetElement("france"); err != nil || e == nil {
53 | t.Errorf("GetIndex() error cant find element !!!\nconfig=%v\nerr=%v", converted, err)
54 | }
55 | }
56 | })
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/internal/cli/list_test.go:
--------------------------------------------------------------------------------
1 | package cli_test
2 |
3 | import (
4 | "os"
5 | "testing"
6 |
7 | "github.com/julien-noblet/download-geofabrik/internal/cli"
8 | "github.com/spf13/viper"
9 | "github.com/stretchr/testify/assert"
10 | "github.com/stretchr/testify/require"
11 | )
12 |
13 | func TestListCmd(t *testing.T) {
14 | // Setup mock config
15 | tmpDir := t.TempDir()
16 |
17 | configFile := tmpDir + "/geofabrik.yml"
18 |
19 | err := os.WriteFile(configFile, []byte(testConfigContent), 0o600)
20 | require.NoError(t, err)
21 |
22 | // Save original args
23 | oldArgs := os.Args
24 |
25 | defer func() { os.Args = oldArgs }()
26 |
27 | // Reset global state
28 | cli.ResetGlobs()
29 | viper.Reset()
30 | // viper.Reset clears everything. initCLI is not run again (sync.Once).
31 | // This might break flag bindings if viper needs them.
32 | // But initConfig runs and binds config.
33 | // We need to re-bind flags manually?
34 | // The flags are bound in initCLI: viper.BindPFlag...
35 | // If we reset viper, we lose those bindings.
36 | // So viper.GetBool("verbose") might fail.
37 | // But list command doesn't use verbose flag logic heavily in the test?
38 | // runList uses viper.ConfigFileUsed().
39 |
40 | // If we validly set cfgFile, initConfig sets viper config file.
41 |
42 | // Test regular list
43 | cli.RootCmd.SetArgs([]string{"list", "--config", configFile})
44 |
45 | err = cli.Execute()
46 | require.NoError(t, err)
47 |
48 | // Test markdown list
49 | cli.RootCmd.SetArgs([]string{"list", "--config", configFile, "--markdown"})
50 |
51 | err = cli.Execute()
52 | assert.NoError(t, err)
53 | }
54 |
--------------------------------------------------------------------------------
/.github/workflows/golangci-lint.yml:
--------------------------------------------------------------------------------
1 | name: golangci-lint
2 | on:
3 | push:
4 | tags:
5 | - v*
6 | branches:
7 | - master
8 | - main
9 | pull_request:
10 | permissions:
11 | contents: read
12 | # Optional: allow read access to pull request. Use with `only-new-issues` option.
13 | pull-requests: read
14 | jobs:
15 | golangci:
16 | name: lint
17 | runs-on: ubuntu-latest
18 | steps:
19 | - uses: actions/checkout@v6
20 | - uses: actions/setup-go@v6
21 | with:
22 | go-version: stable
23 | check-latest: true
24 | cache: true
25 | go-version-file: "go.mod"
26 | - name: golangci-lint
27 | uses: golangci/golangci-lint-action@v9
28 | with:
29 | # Optional: version of golangci-lint to use in form of v1.2 or v1.2.3 or `latest` to use the latest version
30 | version: latest
31 |
32 | # Optional: working directory, useful for monorepos
33 | # working-directory: somedir
34 |
35 | # Optional: golangci-lint command line arguments.
36 | # args: --issues-exit-code=0
37 |
38 | # Optional: show only new issues if it's a pull request. The default value is `false`.
39 | only-new-issues: true
40 |
41 | # Optional: if set to true then the all caching functionality will be complete disabled,
42 | # takes precedence over all other caching options.
43 | skip-cache: true
44 |
45 | # Optional: if set to true then the action don't cache or restore ~/go/pkg.
46 | # skip-pkg-cache: true
47 |
48 | # Optional: if set to true then the action don't cache or restore ~/.cache/go-build.
49 | # skip-build-cache: true
50 |
--------------------------------------------------------------------------------
/internal/lists/lists_test.go:
--------------------------------------------------------------------------------
1 | package lists_test
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/julien-noblet/download-geofabrik/internal/config"
7 | "github.com/julien-noblet/download-geofabrik/internal/element"
8 | "github.com/julien-noblet/download-geofabrik/internal/lists"
9 | "github.com/stretchr/testify/assert"
10 | )
11 |
12 | func TestListAllRegions(t *testing.T) { // it just works
13 | t.Parallel()
14 |
15 | mockConfig := &config.Config{
16 | Elements: map[string]element.Element{
17 | "region1": {Parent: "parent1", Name: "Region 1", Formats: []string{"format1"}},
18 | "region2": {Parent: "parent2", Name: "Region 2", Formats: []string{"format2"}},
19 | },
20 | }
21 |
22 | tests := []struct {
23 | name string
24 | format string
25 | }{
26 | {name: "Default format", format: ""},
27 | {name: "Markdown format", format: "Markdown"},
28 | }
29 |
30 | for _, tt := range tests {
31 | t.Run(tt.name, func(t *testing.T) {
32 | t.Parallel()
33 |
34 | err := lists.ListAllRegions(mockConfig, tt.format)
35 | // Output: | ShortName | Is in | Long Name | formats |
36 | // |-----------|-------|----------|--------|
37 | // | region1 | parent1 | Region 1 | format1 |
38 | // | region2 | parent2 | Region 2 | format2 |
39 | // Total elements: 2
40 |
41 | assert.NoError(t, err)
42 | })
43 | }
44 | }
45 |
46 | func TestGetSortedKeys(t *testing.T) {
47 | t.Parallel()
48 |
49 | mockConfig := &config.Config{
50 | Elements: map[string]element.Element{
51 | "region2": {Parent: "parent2", Name: "Region 2", Formats: []string{"format2"}},
52 | "region1": {Parent: "parent1", Name: "Region 1", Formats: []string{"format1"}},
53 | },
54 | }
55 |
56 | keys := lists.GetSortedKeys(mockConfig)
57 | assert.Equal(t, []string{"region1", "region2"}, keys)
58 | }
59 |
--------------------------------------------------------------------------------
/.github/workflows/genchangelog.yml:
--------------------------------------------------------------------------------
1 | name: Build changelog
2 | on:
3 | repository_dispatch:
4 | types: [ trigger-changelog-workflow ]
5 | workflow_dispatch:
6 | inputs:
7 | next_version:
8 | description: "Next version tag"
9 | required: false
10 | commit_message:
11 | description: "Commit message"
12 | required: false
13 |
14 | jobs:
15 | package:
16 | runs-on: ubuntu-latest
17 | steps:
18 | - name: Checkout repo
19 | uses: actions/checkout@v6
20 | with:
21 | fetch-depth: 0
22 | submodules: recursive
23 | - uses: maicol07/github-changelog-action@patch-1
24 | with:
25 | next_version: ${{ github.event.inputs.next_version }}
26 | - uses: oleksiyrudenko/gha-git-credentials@v2-latest
27 | with:
28 | token: '${{ secrets.GITHUB_TOKEN }}'
29 | - run: "git commit -m \"changelog: 🔖 Updated changelog for commit ${{ github.sha }}\" -a"
30 | if: ${{ !github.event.inputs.commit_message && !github.event.inputs.next_version }}
31 | - run: "git commit -m \"release: 🔖 ${{ github.event.inputs.next_version }}\" -a"
32 | if: ${{ !github.event.inputs.commit_message && github.event.inputs.next_version }}
33 | - run: "git commit -m \"${{ github.event.inputs.commit_message }}\" -a"
34 | if: ${{ github.event.inputs.commit_message }}
35 | - name: Push changes
36 | uses: ad-m/github-push-action@master
37 | with:
38 | github_token: ${{ secrets.GITHUB_TOKEN }}
39 | branch: ${{ github.ref }}
--------------------------------------------------------------------------------
/.github/workflows/genyml.yml:
--------------------------------------------------------------------------------
1 | name: yml-generate
2 | on:
3 | # pull_request:
4 | push:
5 | branches:
6 | - master
7 | schedule:
8 | - cron: '0 0 * * 0' # every Sunday at midnight
9 | permissions:
10 | # contents permission to update benchmark contents in 'benchmarks' branch
11 | contents: write
12 | jobs:
13 | yml-generate:
14 | runs-on: ubuntu-latest
15 | steps:
16 | - uses: actions/checkout@v6
17 | with:
18 | ref: ${{ github.head_ref }}
19 | - uses: actions/setup-go@v6
20 | with:
21 | go-version: stable
22 | check-latest: true
23 | cache: true
24 | - run: go run cmd/download-geofabrik/main.go generate
25 | - run: go run cmd/download-geofabrik/main.go --service="openstreetmap.fr" generate
26 | - run: go run cmd/download-geofabrik/main.go --service="bbbike" generate
27 | - run: go run cmd/download-geofabrik/main.go --service="geo2day" generate
28 | - run: cat .README.md1 > README.md
29 | - run: go run cmd/download-geofabrik/main.go --help >> README.md
30 | - run: cat .README.md2 >> README.md
31 | - run: go run cmd/download-geofabrik/main.go list --markdown >> README.md
32 | - run: echo "" >> README.md
33 | - run: echo "## List of elements from openstreetmap.fr" >> README.md
34 | - run: go run cmd/download-geofabrik/main.go --service "openstreetmap.fr" list --markdown >> README.md
35 | - run: echo "" >> README.md
36 | - run: echo "## List of elements from bbbike.org" >> README.md
37 | - run: go run cmd/download-geofabrik/main.go --service "bbbike" list --markdown >> README.md
38 | - run: echo "" >> README.md
39 | - run: echo "## List of elements from geo2day" >> README.md
40 | - run: go run cmd/download-geofabrik/main.go --service "geo2day" list --markdown >> README.md
41 | - run: echo "" >> README.md
42 | - uses: actions-js/push@master
43 | with:
44 | message: generate yaml files
45 | branch: new_yamls
46 | github_token: ${{ secrets.GITHUB_TOKEN }}
47 | rebase: true
48 | force: true
49 |
--------------------------------------------------------------------------------
/internal/lists/lists.go:
--------------------------------------------------------------------------------
1 | package lists
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "sort"
7 |
8 | "github.com/julien-noblet/download-geofabrik/internal/config"
9 | "github.com/julien-noblet/download-geofabrik/pkg/formats"
10 | "github.com/olekukonko/tablewriter"
11 | "github.com/olekukonko/tablewriter/tw"
12 | )
13 |
14 | const (
15 | MarkdownFormat = "Markdown"
16 | )
17 |
18 | // ListAllRegions lists all regions in the specified format.
19 | func ListAllRegions(configuration *config.Config, format string) error {
20 | table := CreateTable(format)
21 | keys := GetSortedKeys(configuration)
22 |
23 | for _, item := range keys {
24 | err := table.Append(
25 | item,
26 | configuration.Elements[configuration.Elements[item].Parent].Name,
27 | configuration.Elements[item].Name,
28 | formats.GetMiniFormats(configuration.Elements[item].Formats),
29 | )
30 | if err != nil {
31 | return fmt.Errorf("unable to append: %w", err)
32 | }
33 | }
34 |
35 | if err := table.Render(); err != nil {
36 | return fmt.Errorf("unable to render table: %w", err)
37 | }
38 |
39 | fmt.Printf("Total elements: %#v\n", len(configuration.Elements)) //nolint:forbidigo // I want to print the number of elements
40 |
41 | return nil
42 | }
43 |
44 | // CreateTable creates a table with the specified format.
45 | func CreateTable(format string) *tablewriter.Table {
46 | // Options
47 | opts := []tablewriter.Option{
48 | tablewriter.WithHeader([]string{"ShortName", "Is in", "Long Name", "formats"}),
49 | tablewriter.WithAlignment(tw.MakeAlign(4, tw.AlignLeft)), //nolint:mnd // 4 columns to align left
50 | }
51 |
52 | if format == MarkdownFormat {
53 | opts = append(opts, tablewriter.WithRendition(tw.Rendition{
54 | Symbols: tw.NewSymbols(tw.StyleMarkdown),
55 | Borders: tw.Border{
56 | Left: tw.On,
57 | Top: tw.Off,
58 | Right: tw.On,
59 | Bottom: tw.Off,
60 | },
61 | }))
62 | }
63 |
64 | return tablewriter.NewTable(os.Stdout, opts...)
65 | }
66 |
67 | // GetSortedKeys returns the sorted keys of the configuration elements.
68 | func GetSortedKeys(configuration *config.Config) []string {
69 | keys := make(sort.StringSlice, len(configuration.Elements))
70 | i := 0
71 |
72 | for k := range configuration.Elements {
73 | keys[i] = k
74 | i++
75 | }
76 |
77 | keys.Sort()
78 |
79 | return keys
80 | }
81 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/julien-noblet/download-geofabrik
2 |
3 | require (
4 | github.com/PuerkitoBio/goquery v1.11.0
5 | github.com/cheggaaa/pb/v3 v3.1.7
6 | github.com/gocolly/colly/v2 v2.3.0
7 | github.com/olekukonko/tablewriter v1.1.2
8 | github.com/spf13/cobra v1.10.2
9 | github.com/spf13/viper v1.21.0
10 | github.com/stretchr/testify v1.11.1
11 | gopkg.in/yaml.v3 v3.0.1
12 | )
13 |
14 | require (
15 | github.com/VividCortex/ewma v1.2.0 // indirect
16 | github.com/andybalholm/cascadia v1.3.3 // indirect
17 | github.com/antchfx/htmlquery v1.3.5 // indirect
18 | github.com/antchfx/xmlquery v1.5.0 // indirect
19 | github.com/antchfx/xpath v1.3.5 // indirect
20 | github.com/bits-and-blooms/bitset v1.24.4 // indirect
21 | github.com/clipperhouse/displaywidth v0.6.1 // indirect
22 | github.com/clipperhouse/stringish v0.1.1 // indirect
23 | github.com/clipperhouse/uax29/v2 v2.3.0 // indirect
24 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
25 | github.com/fatih/color v1.18.0 // indirect
26 | github.com/fsnotify/fsnotify v1.9.0 // indirect
27 | github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
28 | github.com/gobwas/glob v0.2.3 // indirect
29 | github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
30 | github.com/golang/protobuf v1.5.4 // indirect
31 | github.com/inconshreveable/mousetrap v1.1.0 // indirect
32 | github.com/kennygrant/sanitize v1.2.4 // indirect
33 | github.com/mattn/go-colorable v0.1.14 // indirect
34 | github.com/mattn/go-isatty v0.0.20 // indirect
35 | github.com/mattn/go-runewidth v0.0.19 // indirect
36 | github.com/nlnwa/whatwg-url v0.6.2 // indirect
37 | github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect
38 | github.com/olekukonko/errors v1.1.0 // indirect
39 | github.com/olekukonko/ll v0.1.3 // indirect
40 | github.com/pelletier/go-toml/v2 v2.2.4 // indirect
41 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
42 | github.com/sagikazarmark/locafero v0.12.0 // indirect
43 | github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect
44 | github.com/spf13/afero v1.15.0 // indirect
45 | github.com/spf13/cast v1.10.0 // indirect
46 | github.com/spf13/pflag v1.0.10 // indirect
47 | github.com/subosito/gotenv v1.6.0 // indirect
48 | github.com/temoto/robotstxt v1.1.2 // indirect
49 | go.yaml.in/yaml/v3 v3.0.4 // indirect
50 | golang.org/x/net v0.47.0 // indirect
51 | golang.org/x/sys v0.38.0 // indirect
52 | golang.org/x/text v0.31.0 // indirect
53 | google.golang.org/appengine v1.6.8 // indirect
54 | google.golang.org/protobuf v1.36.10 // indirect
55 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
56 | )
57 |
58 | go 1.24.0
59 |
60 | toolchain go1.25.5
61 |
--------------------------------------------------------------------------------
/internal/cli/root.go:
--------------------------------------------------------------------------------
1 | package cli
2 |
3 | import (
4 | "fmt"
5 | "log/slog"
6 | "os"
7 | "sync"
8 |
9 | "github.com/julien-noblet/download-geofabrik/internal/config"
10 | "github.com/spf13/cobra"
11 | "github.com/spf13/viper"
12 | )
13 |
14 | var (
15 | cfgFile string
16 | service string
17 | )
18 |
19 | var rootCmd = &cobra.Command{
20 | Use: "download-geofabrik",
21 | Short: "A command-line tool for downloading OSM files",
22 | Long: `download-geofabrik is a CLI tool that downloads OpenStreetMap data from Geofabrik.`,
23 | RunE: func(cmd *cobra.Command, _ []string) error {
24 | return cmd.Help()
25 | },
26 | }
27 |
28 | var once sync.Once
29 |
30 | // Execute adds all child commands to the root command and sets flags appropriately.
31 | func Execute() error {
32 | once.Do(func() {
33 | initCLI()
34 |
35 | RegisterDownloadCmd()
36 | RegisterGenerateCmd()
37 | RegisterListCmd()
38 | })
39 |
40 | if err := rootCmd.Execute(); err != nil {
41 | return fmt.Errorf("root cmd execution failed: %w", err)
42 | }
43 |
44 | return nil
45 | }
46 |
47 | func initCLI() {
48 | cobra.OnInitialize(initConfig)
49 |
50 | rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is geofabrik.yml)")
51 | rootCmd.PersistentFlags().StringVarP(&service, "service", "s", config.DefaultService,
52 | "Service to use (geofabrik, geofabrik-parse, openstreetmap.fr, geo2day, bbbike)")
53 | rootCmd.PersistentFlags().Bool("verbose", false, "Verbose mode")
54 | rootCmd.PersistentFlags().Bool("quiet", false, "Quiet mode")
55 |
56 | // Bind flags to viper
57 | if err := viper.BindPFlag("config", rootCmd.PersistentFlags().Lookup("config")); err != nil {
58 | fmt.Fprintf(os.Stderr, "Error binding config flag: %v\n", err)
59 | }
60 |
61 | if err := viper.BindPFlag("service", rootCmd.PersistentFlags().Lookup("service")); err != nil {
62 | fmt.Fprintf(os.Stderr, "Error binding service flag: %v\n", err)
63 | }
64 |
65 | if err := viper.BindPFlag("verbose", rootCmd.PersistentFlags().Lookup("verbose")); err != nil {
66 | fmt.Fprintf(os.Stderr, "Error binding verbose flag: %v\n", err)
67 | }
68 |
69 | if err := viper.BindPFlag("quiet", rootCmd.PersistentFlags().Lookup("quiet")); err != nil {
70 | fmt.Fprintf(os.Stderr, "Error binding quiet flag: %v\n", err)
71 | }
72 | }
73 |
74 | func initConfig() {
75 | if cfgFile != "" {
76 | // Use config file from the flag.
77 | viper.SetConfigFile(cfgFile)
78 | } else {
79 | // Search config in home directory with name ".download-geofabrik" (without extension).
80 | viper.AddConfigPath(".")
81 | viper.SetConfigType("yaml")
82 |
83 | if service != "" {
84 | viper.SetConfigName(service)
85 | } else {
86 | viper.SetConfigName(config.DefaultConfigFile)
87 | }
88 | }
89 |
90 | viper.AutomaticEnv()
91 |
92 | if err := viper.ReadInConfig(); err == nil {
93 | slog.Info("Using config file", "file", viper.ConfigFileUsed())
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/internal/cli/root_test.go:
--------------------------------------------------------------------------------
1 | package cli_test
2 |
3 | import (
4 | "os"
5 | "testing"
6 |
7 | "github.com/julien-noblet/download-geofabrik/internal/cli"
8 | "github.com/spf13/viper"
9 | "github.com/stretchr/testify/assert"
10 | "github.com/stretchr/testify/require"
11 | )
12 |
13 | func TestExecuteHelp(t *testing.T) {
14 | // Execute() now returns validation errors or nil.
15 | // Since we can't easily capture stdout/stderr without more complex refactoring or redirection,
16 | // checking that it returns nil for cases where it should just print help/version is a basic start.
17 |
18 | // However, cobra's Execute might need args.
19 | // `rootCmd` is package level variable. We can modify it for tests but should be careful.
20 |
21 | // A safe way is to set args on rootCmd before calling Execute,
22 | // but Execute() in root.go calls initCLI() which might reset things or bind flags again.
23 | // Let's rely on the fact that Execute() returns error.
24 |
25 | // If we run without args, it prints help and returns nil (RunE calls cmd.Help()).
26 | // We need to inject args if we want to test other things.
27 | // But Execute() does not take args. It uses os.Args by default unless we change it.
28 |
29 | // So calling Execute() directly in test will try to parse actual test binary flags which might fail or be weird.
30 | // We should probably allow passing args to Execute or just test the commands directly?
31 | // But the goal is to test Execute().
32 |
33 | // Changing Execute to take args would be a bigger refactor.
34 | // Simpler: Just make successful call to Execute?
35 | // Without args, it just runs help.
36 |
37 | // IMPORTANT: cobra uses os.Args[1:] if we don't set args.
38 | // When running "go test", os.Args contains test flags.
39 | // This will likely cause unknown flag errors in cobra.
40 |
41 | // We should probably refactor Execute to allow passing args, or set os.Args mock?
42 | // Setting os.Args is risky.
43 |
44 | // Better: Refactor `Execute` to accept args?
45 | // Or just test `rootCmd` directly in other tests and acknowledge `Execute` is hard to test as is?
46 |
47 | // Let's try to verify `rootCmd` configuration instead.
48 | assert.NotNil(t, cli.RootCmd)
49 | assert.Equal(t, "download-geofabrik", cli.RootCmd.Use)
50 | cli.RootCmd.SetArgs([]string{"--help"})
51 |
52 | err := cli.Execute()
53 | assert.NoError(t, err)
54 | }
55 |
56 | func TestDefaultConfigLoading(t *testing.T) {
57 | // Create temp dir
58 | tmpDir := t.TempDir()
59 |
60 | // Create default config file
61 | configFile := tmpDir + "/geofabrik.yml"
62 |
63 | err := os.WriteFile(configFile, []byte(testConfigContent), 0o600)
64 | require.NoError(t, err)
65 |
66 | // Change cwd
67 | t.Chdir(tmpDir)
68 |
69 | // Reset globs
70 | cli.ResetGlobs()
71 | viper.Reset()
72 |
73 | // Run list command without --config
74 | // It should pick up geofabrik.yml in cwd
75 | cli.RootCmd.SetArgs([]string{"list"})
76 |
77 | err = cli.Execute()
78 | assert.NoError(t, err)
79 | }
80 |
--------------------------------------------------------------------------------
/internal/downloader/hash.go:
--------------------------------------------------------------------------------
1 | package download
2 |
3 | import (
4 | "crypto/md5" //nolint:gosec // MD5 is used to control with md5sum files
5 | "encoding/hex"
6 | "fmt"
7 | "io"
8 | "log/slog"
9 | "os"
10 | "strings"
11 | )
12 |
13 | const (
14 | readErrorMsg = "can't read %s: %w"
15 | openErrorMsg = "can't open %s: %w"
16 | copyErrorMsg = "can't copy %s: %w"
17 | closeErrorMsg = "can't close file: %w"
18 | hashFileNotFoundMsg = "Hash file %s not found"
19 | hashFileReadErrorMsg = "Can't read hash file %s"
20 | hashMismatchMsg = "Checksum MISMATCH for %s"
21 | hashMatchMsg = "Checksum OK for %s"
22 | hashingFileMsg = "Hashing %s"
23 | md5HashMsg = "MD5 : %s"
24 | checksumErrorMsg = "checksum error"
25 | hashFileErrorMsg = "can't hash file"
26 | )
27 |
28 | // CheckFileHash checks if the hash of a file matches the provided hash.
29 | func CheckFileHash(hashfile, expectedHash string) (bool, error) {
30 | if !FileExist(hashfile) {
31 | slog.Warn("Hash file not found", "file", hashfile)
32 |
33 | return false, nil
34 | }
35 |
36 | fileContent, err := os.ReadFile(hashfile)
37 | if err != nil {
38 | slog.Warn("Can't read hash file", "file", hashfile, "error", err)
39 |
40 | return false, fmt.Errorf(readErrorMsg, hashfile, err)
41 | }
42 |
43 | fileHash := strings.Split(string(fileContent), " ")[0]
44 | slog.Info("Hash from file", "hash", fileHash)
45 |
46 | return strings.EqualFold(expectedHash, fileHash), nil
47 | }
48 |
49 | // ComputeMD5Hash computes the MD5 hash of a file.
50 | func ComputeMD5Hash(filePath string) (string, error) {
51 | if !FileExist(filePath) {
52 | return "", nil
53 | }
54 |
55 | file, err := os.Open(filePath)
56 | if err != nil {
57 | return "", fmt.Errorf(openErrorMsg, filePath, err)
58 | }
59 |
60 | defer func() {
61 | if err := file.Close(); err != nil {
62 | slog.Error("Can't close file", "error", err)
63 | }
64 | }()
65 |
66 | hash := md5.New() //nolint:gosec // MD5 is used to control with md5sum files
67 | if _, err := io.Copy(hash, file); err != nil {
68 | return "", fmt.Errorf(copyErrorMsg, filePath, err)
69 | }
70 |
71 | return hex.EncodeToString(hash.Sum(nil)), nil
72 | }
73 |
74 | // VerifyFileChecksum verifies the checksum of a file.
75 | func VerifyFileChecksum(file, hashfile string) bool {
76 | slog.Info("Hashing file", "file", file)
77 |
78 | hashed, err := ComputeMD5Hash(file)
79 | if err != nil {
80 | slog.Error("Can't hash file", "error", err)
81 |
82 | return false // Was Fatal before
83 | }
84 |
85 | slog.Debug("MD5 Hash", "hash", hashed)
86 |
87 | ret, err := CheckFileHash(hashfile, hashed)
88 | if err != nil {
89 | slog.Error("Checksum error", "error", err)
90 | }
91 |
92 | if ret {
93 | slog.Info("Checksum OK", "file", file)
94 | } else {
95 | slog.Warn("Checksum MISMATCH", "file", file)
96 | }
97 |
98 | return ret
99 | }
100 |
--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
1 | # For most projects, this workflow file will not need changing; you simply need
2 | # to commit it to your repository.
3 | #
4 | # You may wish to alter this file to override the set of languages analyzed,
5 | # or to provide custom queries or build logic.
6 | #
7 | # ******** NOTE ********
8 | # We have attempted to detect the languages in your repository. Please check
9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 |
14 | on:
15 | push:
16 | branches: [ "master" ]
17 | pull_request:
18 | # The branches below must be a subset of the branches above
19 | branches: [ "master" ]
20 | schedule:
21 | - cron: '39 20 * * 5'
22 |
23 | jobs:
24 | analyze:
25 | name: Analyze
26 | runs-on: ubuntu-latest
27 | permissions:
28 | actions: read
29 | contents: read
30 | security-events: write
31 |
32 | strategy:
33 | fail-fast: false
34 | matrix:
35 | language: [ 'go' ]
36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
37 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
38 |
39 | steps:
40 | - name: Checkout repository
41 | uses: actions/checkout@v6
42 |
43 | # Initializes the CodeQL tools for scanning.
44 | - name: Initialize CodeQL
45 | uses: github/codeql-action/init@v4
46 | with:
47 | languages: ${{ matrix.language }}
48 | # If you wish to specify custom queries, you can do so here or in a config file.
49 | # By default, queries listed here will override any specified in a config file.
50 | # Prefix the list here with "+" to use these queries and those in the config file.
51 |
52 | # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
53 | # queries: security-extended,security-and-quality
54 |
55 |
56 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
57 | # If this step fails, then you should remove it and run the build manually (see below)
58 | - name: Autobuild
59 | uses: github/codeql-action/autobuild@v4
60 |
61 | # ℹ️ Command-line programs to run using the OS shell.
62 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
63 |
64 | # If the Autobuild fails above, remove it and uncomment the following three lines.
65 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
66 |
67 | # - run: |
68 | # echo "Run, Build Application using script"
69 | # ./location_of_script_within_repo/buildscript.sh
70 |
71 | - name: Perform CodeQL Analysis
72 | uses: github/codeql-action/analyze@v4
73 |
--------------------------------------------------------------------------------
/.github/workflows/coverage.yml:
--------------------------------------------------------------------------------
1 | name: "Go Coverage"
2 | permissions:
3 | # contents permission to update benchmark contents in 'benchmarks' branch
4 | contents: write
5 | on:
6 | pull_request:
7 | push:
8 | branches:
9 | # It's important that the action also runs on merge to main
10 | - main
11 | - master
12 |
13 | jobs:
14 | coverage:
15 | runs-on: ubuntu-latest
16 | steps:
17 | - uses: actions/setup-go@v6
18 | with:
19 | go-version: 1.25.5
20 |
21 | - uses: actions/checkout@v6
22 | with:
23 | # default fetch-depth is insufficent to find previous coverage notes
24 | fetch-depth: 10
25 |
26 | - uses: gwatts/go-coverage-action@v2
27 | id: coverage
28 | continue-on-error: true
29 | with:
30 | # Optional coverage threshold
31 | # use fail-coverage to determine what should happen below this threshold
32 | coverage-threshold: 70
33 |
34 | # collect coverage for all packages beyond the one under test
35 | cover-pkg: ./...
36 |
37 | # Ignore code-generated files when calculating coverage totals
38 | ignore-pattern: |
39 | \.pb\.go$
40 | \_string\.go$
41 |
42 | # A url that the html report will be accessible at, once your
43 | # workflow uploads it. Used in the pull request comment.
44 | report-url: https://julien-noblet.github.io/download-geofabrik/${{ github.ref_name}}.html
45 |
46 | - name: Generate coverage tree map
47 | run: |
48 | mkdir -p ${{ github.ref_name}}
49 | mkdir -p /tmp/${{ github.ref_name}}
50 | go run github.com/nikolaydubina/go-cover-treemap@latest -coverprofile ${{ steps.coverage.outputs.gocov-agg-pathname }} -only-folders=true > /tmp/${{ github.ref_name}}.svg
51 | - name: Commit files
52 | run: |
53 | git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
54 | git config --local user.name "github-actions[bot]"
55 | git fetch --all
56 | git checkout gh-pages
57 | mkdir -p ${{ github.ref_name}}
58 | cp -r ${{ steps.coverage.outputs.report-pathname }} ${{ github.ref_name}}.html
59 | cp /tmp/${{ github.ref_name}}.svg ${{ github.ref_name}}.svg
60 | git add ${{ github.ref_name}}.html ${{ github.ref_name}}.svg
61 | git commit -m "Upload coverage report"
62 | - name: Push changes
63 | uses: ad-m/github-push-action@master
64 | with:
65 | github_token: ${{ secrets.GITHUB_TOKEN }}
66 | branch: gh-pages
67 | badge-generation:
68 | runs-on: ubuntu-latest
69 | if: github.ref == 'refs/heads/master'
70 | steps:
71 | - uses: actions/checkout@v6
72 | with:
73 | # default fetch-depth is insufficent to find previous coverage notes
74 | fetch-depth: 10
75 |
76 | - uses: actions/setup-go@v6
77 | with:
78 | go-version: 1.25.5
79 |
80 | - uses: gwatts/go-coverage-action@v2
81 | id: coverage
82 | continue-on-error: true
83 | with:
84 | # Optional coverage threshold
85 | # use fail-coverage to determine what should happen below this threshold
86 | coverage-threshold: 70
87 |
88 | # collect coverage for all packages beyond the one under test
89 | cover-pkg: ./...
90 |
91 | # Ignore code-generated files when calculating coverage totals
92 | ignore-pattern: |
93 | \.pb\.go$
94 | \_string\.go$
95 |
96 | # A url that the html report will be accessible at, once your
97 | # workflow uploads it. Used in the pull request comment.
98 | report-url: https://julien-noblet.github.io/download-geofabrik/${{ github.ref_name}}.html
99 |
100 | - name: Create Awesome Badge
101 | uses: schneegans/dynamic-badges-action@v1.7.0
102 | with:
103 | auth: ${{ secrets.GIST_SECRET }}
104 | gistID: a509e15ea4734ca3e8e98f32ab5369c0
105 | filename: coverage.json
106 | label: Go Coverage
107 | message: ${{ steps.coverage.outputs.coverage-pct-1dp }}%
108 | color: ${{ steps.coverage.outputs.meets-threshold == 'true' && 'green' || 'red' }}
109 |
--------------------------------------------------------------------------------
/internal/downloader/hash_test.go:
--------------------------------------------------------------------------------
1 | package download_test
2 |
3 | import (
4 | "os"
5 | "testing"
6 |
7 | download "github.com/julien-noblet/download-geofabrik/internal/downloader"
8 | )
9 |
10 | func Test_hashFileMD5(t *testing.T) {
11 | t.Parallel()
12 |
13 | type args struct {
14 | filePath string
15 | }
16 |
17 | tests := []struct {
18 | name string
19 | args args
20 | want string
21 | wantErr bool
22 | }{
23 | // TODO: Add test cases.
24 | {
25 | name: "Check with LICENSE file",
26 | args: args{filePath: "../../LICENSE"},
27 | want: "65d26fcc2f35ea6a181ac777e42db1ea",
28 | wantErr: false,
29 | },
30 | }
31 |
32 | for _, thisTest := range tests {
33 | t.Run(thisTest.name, func(t *testing.T) {
34 | t.Parallel()
35 |
36 | got, err := download.ComputeMD5Hash(thisTest.args.filePath)
37 | if err != nil != thisTest.wantErr {
38 | t.Errorf("hashFileMD5(%v) error = %v, wantErr %v", thisTest.args.filePath, err, thisTest.wantErr)
39 |
40 | return
41 | }
42 |
43 | if got != thisTest.want {
44 | t.Errorf("hashFileMD5() = %v, want %v", got, thisTest.want)
45 | }
46 | })
47 | }
48 | }
49 |
50 | func Benchmark_hashFileMD5_LICENSE(b *testing.B) {
51 | for range make([]struct{}, b.N) {
52 | if _, err := download.ComputeMD5Hash("../../LICENSE"); err != nil {
53 | b.Error(err.Error())
54 | }
55 | }
56 | }
57 |
58 | func Benchmark_controlHash_LICENSE(b *testing.B) {
59 | hash, _ := download.ComputeMD5Hash("../../LICENSE")
60 | hashfile := "/tmp/download-geofabrik-test.hash"
61 |
62 | if err := os.WriteFile(hashfile, []byte(hash), 0o600); err != nil {
63 | b.Errorf("Can't write file %s err: %v", hashfile, err)
64 | }
65 |
66 | for range make([]struct{}, b.N) {
67 | if _, err := download.CheckFileHash(hashfile, hash); err != nil {
68 | b.Error(err.Error())
69 | }
70 | }
71 | }
72 |
73 | func Test_controlHash(t *testing.T) {
74 | type args struct {
75 | hashfile string
76 | hash string
77 | }
78 |
79 | tests := []struct {
80 | args args
81 | name string
82 | fileToHash string
83 | want bool
84 | wantErr bool
85 | }{
86 | // TODO: Add test cases.
87 | {
88 | name: "Check with LICENSE file",
89 | fileToHash: "../../LICENSE",
90 | args: args{hashfile: "../../LICENSE.md5", hash: "65d26fcc2f35ea6a181ac777e42db1ea"},
91 | want: true,
92 | wantErr: false,
93 | },
94 | {
95 | name: "Check with LICENSE file wrong hash",
96 | fileToHash: "../../LICENSE",
97 | args: args{hashfile: "../../LICENSE.md5", hash: "65d26fcc2f35ea6a181ac777e42db1eb"},
98 | want: false,
99 | wantErr: false,
100 | },
101 | }
102 |
103 | for _, thisTest := range tests {
104 | hash, _ := download.ComputeMD5Hash(thisTest.fileToHash)
105 |
106 | hashfull := hash + " " + thisTest.fileToHash
107 |
108 | if err := os.WriteFile(thisTest.args.hashfile, []byte(hashfull), 0o600); err != nil {
109 | t.Errorf("can't write file %s err: %v", thisTest.args.hashfile, err)
110 | }
111 |
112 | t.Run(thisTest.name, func(t *testing.T) {
113 | got, err := download.CheckFileHash(thisTest.args.hashfile, thisTest.args.hash)
114 | if err != nil != thisTest.wantErr {
115 | t.Errorf("controlHash() error = %v, wantErr %v", err, thisTest.wantErr)
116 |
117 | return
118 | }
119 |
120 | if got != thisTest.want {
121 | t.Errorf("controlHash() = %v, want %v", got, thisTest.want)
122 | }
123 | })
124 | }
125 | }
126 |
127 | func Test_VerifyFileChecksum(t *testing.T) {
128 | t.Parallel()
129 |
130 | type args struct {
131 | outputPath string
132 | hashfile string
133 | }
134 |
135 | tests := []struct {
136 | name string
137 | args args
138 | want bool
139 | }{
140 | {
141 | name: "Check with LICENSE file",
142 | args: args{outputPath: "../../LICENSE", hashfile: "../../LICENSE.md5"},
143 | want: true,
144 | },
145 | }
146 | for _, tt := range tests {
147 | t.Run(tt.name, func(t *testing.T) {
148 | t.Parallel()
149 |
150 | if got := download.VerifyFileChecksum(tt.args.outputPath, tt.args.hashfile); got != tt.want {
151 | t.Errorf("VerifyChecksum() = %v, want %v", got, tt.want)
152 | }
153 | })
154 | }
155 | }
156 |
--------------------------------------------------------------------------------
/.goreleaser.yaml:
--------------------------------------------------------------------------------
1 | # This is an example .goreleaser.yml file with some sensible defaults.
2 | # Make sure to check the documentation at https://goreleaser.com
3 |
4 | # The lines below are called `modelines`. See `:help modeline`
5 | # Feel free to remove those if you don't want/need to use them.
6 | # yaml-language-server: $schema=https://goreleaser.com/static/schema.json
7 | # vim: set ts=2 sw=2 tw=0 fo=cnqoj
8 |
9 | version: 2
10 |
11 | before:
12 | hooks:
13 | # You may remove this if you don't use go modules.
14 | - go mod tidy
15 | # you may remove this if you don't need go generate
16 | - go generate ./...
17 | - go run cmd/download-geofabrik/main.go generate
18 | - go run cmd/download-geofabrik/main.go --service="openstreetmap.fr" generate
19 | - go run cmd/download-geofabrik/main.go --service="bbbike" generate
20 | - go run cmd/download-geofabrik/main.go --service="geo2day" generate
21 | - cat .README.md1 > README.md
22 | - go run cmd/download-geofabrik/main.go --help >> README.md
23 | - cat .README.md2 >> README.md
24 | - go run cmd/download-geofabrik/main.go list --markdown >> README.md
25 | - echo "" >> README.md
26 | - echo "## List of elements from openstreetmap.fr" >> README.md
27 | - go run cmd/download-geofabrik/main.go --service "openstreetmap.fr" list --markdown >> README.md
28 | - echo "" >> README.md
29 | - echo "## List of elements from bbbike.org" >> README.md
30 | - go run cmd/download-geofabrik/main.go --service "bbbike" list --markdown >> README.md
31 | - echo "" >> README.md
32 | - echo "## List of elements from geo2day" >> README.md
33 | - go run cmd/download-geofabrik/main.go --service "geo2day" list --markdown >> README.md
34 |
35 |
36 | builds:
37 | - main: ./cmd/download-geofabrik/main.go
38 | env:
39 | - CGO_ENABLED=0
40 | goos:
41 | - linux
42 | - windows
43 | - darwin
44 | - dragonfly
45 | - freebsd
46 | - illumos
47 | - netbsd
48 | - openbsd
49 | - solaris
50 |
51 | goarch:
52 | - amd64
53 | - 386
54 | - arm
55 | - arm64
56 | - mips
57 | - mips64
58 | - mips64le
59 | - mipsle
60 | - ppc64
61 | - ppc64le
62 | - s390x
63 |
64 | #goamd64:
65 | # - v1
66 | # - v2
67 | # - v3
68 | # - v4
69 |
70 | goarm:
71 | - 5
72 | - 6
73 | - 7
74 |
75 | ignore:
76 | - goos: darwin
77 | goarch: 386
78 |
79 | ldflags: >-
80 | -s -w
81 | -X main.version={{ .Version }}
82 |
83 | universal_binaries:
84 | - replace: true
85 |
86 | upx:
87 | - # Whether to enable it or not.
88 | #
89 | # Templates: allowed.
90 | enabled: false # remove upx since I've issues with it
91 |
92 | goos:
93 | - linux
94 | - windows
95 |
96 | # Compress argument.
97 | # Valid options are from '1' (faster) to '9' (better), and 'best'.
98 | compress: best
99 |
100 | # Whether to try LZMA (slower).
101 | lzma: true
102 |
103 | # Whether to try all methods and filters (slow).
104 | brute: true
105 |
106 | archives:
107 | - format: tar.gz
108 | # this name template makes the OS and Arch compatible with the results of `uname`.
109 | name_template: >-
110 | {{ .ProjectName }}_
111 | {{- title .Os }}_
112 | {{- if eq .Arch "amd64" }}x86_64
113 | {{- else if eq .Arch "386" }}i386
114 | {{- else }}{{ .Arch }}{{ end }}
115 | {{- if .Arm }}v{{ .Arm }}{{ end }}
116 | # use zip for windows archives
117 | format_overrides:
118 | - goos: windows
119 | format: zip
120 | files:
121 | - license*
122 | - LICENSE*
123 | - README.md
124 | - changelog*
125 | - CHANGELOG*
126 | - bbbike.yml
127 | - geofabrik.yml
128 | - openstreetmap.fr.yml
129 | - geo2day.yml
130 | checksum:
131 | name_template: 'checksums.txt'
132 | snapshot:
133 | version_template: "{{ .Tag }}-next"
134 |
135 | changelog:
136 | sort: asc
137 | filters:
138 | exclude:
139 | - "^docs:"
140 | - "^test:"
141 |
142 | dockers:
143 | - goos: linux
144 | goarch: amd64
145 | #binaries:
146 | # - download-geofabrik
147 | image_templates:
148 | - "juliennoblet/{{.ProjectName}}:latest"
149 | - "juliennoblet/{{.ProjectName}}:{{ .Tag }}"
150 | - "juliennoblet/{{.ProjectName}}:v{{ .Major }}"
151 | - "juliennoblet/{{.ProjectName}}:v{{ .Major }}.{{ .Minor }}"
152 | skip_push: auto
153 | dockerfile: Dockerfile
154 | extra_files:
155 | - geofabrik.yml
156 | - openstreetmap.fr.yml
157 | - bbbike.yml
158 | - geo2day.yml
--------------------------------------------------------------------------------
/internal/scrapper/bbbike/bbbike.go:
--------------------------------------------------------------------------------
1 | package bbbike
2 |
3 | import (
4 | "errors"
5 | "log/slog"
6 | "regexp"
7 |
8 | "github.com/gocolly/colly/v2"
9 | "github.com/julien-noblet/download-geofabrik/internal/element"
10 | "github.com/julien-noblet/download-geofabrik/internal/scrapper"
11 | "github.com/julien-noblet/download-geofabrik/pkg/formats"
12 | )
13 |
14 | // Constants for magic numbers and URLs.
15 | const (
16 | progressBarCount = 237 // number of elements
17 | parallelism = 20 // number of parallel downloads
18 | prefixLength = 17 // length of "OSM extracts for "
19 | baseURL = "https://download.bbbike.org/osm/bbbike"
20 | startURL = baseURL + "/"
21 | )
22 |
23 | // Bbbike Scrapper.
24 | type Bbbike struct {
25 | *scrapper.Scrapper
26 | }
27 |
28 | // GetDefault returns the default configuration for Bbbike scrapper.
29 | func GetDefault() *Bbbike {
30 | urlFilters := []*regexp.Regexp{
31 | regexp.MustCompile(`https://download\.bbbike\.org/osm/bbbike/[A-Z].+$`),
32 | regexp.MustCompile(`https://download\.bbbike\.org/osm/bbbike/$`),
33 | }
34 |
35 | formatDefinition := formats.FormatDefinitions{
36 | formats.FormatCSV: {ID: formats.FormatCSV, Loc: ".osm.csv.xz", ToLoc: ".osm.csv.xz"},
37 | formats.FormatGarminOSM: {ID: formats.FormatGarminOSM, Loc: ".osm.garmin-osm.zip"},
38 | formats.FormatGarminOnroad: {ID: formats.FormatGarminOnroad, Loc: ".osm.garmin-onroad-latin1.zip"},
39 | formats.FormatGarminOntrail: {ID: formats.FormatGarminOntrail, Loc: ".osm.garmin-ontrail-latin1.zip"},
40 | formats.FormatGarminOpenTopo: {ID: formats.FormatGarminOpenTopo, Loc: ".osm.garmin-opentopo-latin1.zip"},
41 | formats.FormatGeoJSON: {ID: formats.FormatGeoJSON, Loc: ".osm.geojson.xz", ToLoc: ".geojson.xz"},
42 | formats.FormatMBTiles: {ID: formats.FormatMBTiles, Loc: ".osm.mbtiles-openmaptiles.zip", ToLoc: "osm.mbtiles-openmaptiles.zip"},
43 | formats.FormatMapsforge: {ID: formats.FormatMapsforge, Loc: ".osm.mapsforge-osm.zip"},
44 | formats.FormatOsmGz: {ID: formats.FormatOsmGz, Loc: ".osm.gz"},
45 | formats.FormatOsmPbf: {ID: formats.FormatOsmPbf, Loc: ".osm.pbf"},
46 | formats.FormatPoly: {ID: formats.FormatPoly, Loc: ".poly"},
47 | formats.FormatShpZip: {ID: formats.FormatShpZip, Loc: ".osm.shp.zip"},
48 | }
49 |
50 | return &Bbbike{
51 | Scrapper: &scrapper.Scrapper{
52 | PB: progressBarCount,
53 | Async: true,
54 | Parallelism: parallelism,
55 | MaxDepth: 0,
56 | AllowedDomains: []string{`download.bbbike.org`},
57 | BaseURL: baseURL,
58 | StartURL: startURL,
59 | URLFilters: urlFilters,
60 | FormatDefinition: formatDefinition,
61 | },
62 | }
63 | }
64 |
65 | // Collector represents Bbbike's scrapper.
66 | func (b *Bbbike) Collector() *colly.Collector {
67 | myCollector := b.Scrapper.Collector()
68 | myCollector.OnHTML("div.list tbody", func(e *colly.HTMLElement) {
69 | b.ParseList(e, myCollector)
70 | })
71 | myCollector.OnHTML("#sidebar", func(e *colly.HTMLElement) {
72 | b.ParseSidebar(e, myCollector)
73 | })
74 |
75 | return myCollector
76 | }
77 |
78 | // ParseList parses the list of elements from the HTML.
79 | func (b *Bbbike) ParseList(e *colly.HTMLElement, c *colly.Collector) {
80 | e.ForEach("a", func(_ int, el *colly.HTMLElement) {
81 | href := el.Request.AbsoluteURL(el.Attr("href"))
82 | slog.Debug("Parse", "href", href)
83 |
84 | if err := c.Visit(href); err != nil && !errors.Is(err, colly.ErrNoURLFiltersMatch) {
85 | slog.Error("Can't get url", "error", err)
86 | }
87 | })
88 | }
89 |
90 | // GetName extracts the name from the given string.
91 | func GetName(h3 string) string {
92 | return h3[prefixLength:] // remove "OSM extracts for "
93 | }
94 |
95 | // ParseSidebar parses the sidebar information from the HTML.
96 | func (b *Bbbike) ParseSidebar(e *colly.HTMLElement, _ *colly.Collector) {
97 | name := GetName(e.ChildText("h3"))
98 | myElement := element.Element{
99 | ID: name,
100 | Name: name,
101 | File: name + "/" + name,
102 | Parent: "",
103 | Formats: element.Formats{
104 | formats.FormatCSV,
105 | formats.FormatGarminOSM,
106 | formats.FormatGarminOnroad,
107 | formats.FormatGarminOntrail,
108 | formats.FormatGarminOpenTopo,
109 | formats.FormatGeoJSON,
110 | formats.FormatMBTiles,
111 | formats.FormatMapsforge,
112 | formats.FormatOsmGz,
113 | formats.FormatOsmPbf,
114 | formats.FormatPoly,
115 | formats.FormatShpZip,
116 | },
117 | Meta: false,
118 | }
119 |
120 | slog.Debug("Add", "name", name)
121 |
122 | if err := b.Config.MergeElement(&myElement); err != nil {
123 | slog.Error("Can't merge element", "name", myElement.Name, "error", err)
124 | }
125 | }
126 |
--------------------------------------------------------------------------------
/pkg/formats/formats.go:
--------------------------------------------------------------------------------
1 | package formats
2 |
3 | import (
4 | "sort"
5 | "strings"
6 | )
7 |
8 | // Format represents a file format with various attributes.
9 | type Format struct {
10 | ID string `yaml:"ext"`
11 | Loc string `yaml:"loc"`
12 | BasePath string `yaml:"basepath,omitempty"`
13 | BaseURL string `yaml:"baseurl,omitempty"`
14 | ToLoc string `yaml:"toloc,omitempty"`
15 | Type string `yaml:"type,omitempty"` // Added to match new config, though original didn't have it explicit?
16 | }
17 |
18 | // FormatDefinitions is a map of format definitions.
19 | type FormatDefinitions map[string]Format
20 |
21 | // MiniFormat represents a short and full name pair for a format.
22 | type MiniFormat struct {
23 | ShortName string
24 | FullName string
25 | }
26 |
27 | const (
28 | FormatState = "state"
29 | FormatOsmPbf = "osm.pbf"
30 | FormatOsmGz = "osm.gz"
31 | FormatOsmBz2 = "osm.bz2"
32 | FormatOshPbf = "osh.pbf"
33 | FormatPoly = "poly"
34 | FormatShpZip = "shp.zip"
35 | FormatKml = "kml"
36 | FormatGeoJSON = "geojson" // BBBike & OSM Today only
37 | FormatGarminOntrail = "osm.garmin-ontrail-latin1.zip" // BBBike only
38 | FormatGarminOnroad = "osm.garmin-onroad-latin1.zip" // BBBike only
39 | FormatGarminOpenTopo = "osm.garmin-opentopo-latin1.zip" // BBBike only
40 | FormatGarminOSM = "osm.garmin-osm.zip" // BBBike only
41 | FormatMapsforge = "osm.mapsforge-osm.zip" // BBBike only
42 | FormatMBTiles = "mbtiles"
43 | FormatCSV = "csv" // BBBike only
44 | )
45 |
46 | // Configuration keys.
47 | const (
48 | KeyOsmPbf = "dosmPbf"
49 | KeyOshPbf = "doshPbf"
50 | KeyOsmGz = "dosmGz"
51 | KeyOsmBz2 = "dosmBz2"
52 | KeyShpZip = "dshpZip"
53 | KeyState = "dstate"
54 | KeyPoly = "dpoly"
55 | KeyKml = "dkml"
56 | KeyGeoJSON = "dgeojson"
57 | KeyGarminOSM = "dgarmin"
58 | KeyMapsforge = "dmaps"
59 | KeyMBTiles = "dmbtiles"
60 | KeyCSV = "dcsv"
61 | KeyGarminOnroad = "dgarminonroad"
62 | KeyGarminOntrail = "dgarminontrail"
63 | KeyGarminOpenTopo = "dgarminopentopo"
64 | )
65 |
66 | // GetMiniFormats returns a string of short format names based on the provided full format names.
67 | func GetMiniFormats(fullFormatNames []string) string {
68 | miniFormats := []MiniFormat{
69 | {ShortName: "s", FullName: FormatState},
70 | {ShortName: "P", FullName: FormatOsmPbf},
71 | {ShortName: "G", FullName: FormatOsmGz},
72 | {ShortName: "B", FullName: FormatOsmBz2},
73 | {ShortName: "H", FullName: FormatOshPbf},
74 | {ShortName: "p", FullName: FormatPoly},
75 | {ShortName: "S", FullName: FormatShpZip},
76 | {ShortName: "k", FullName: FormatKml},
77 | {ShortName: "g", FullName: FormatGeoJSON},
78 | {ShortName: "t", FullName: FormatGarminOntrail},
79 | {ShortName: "r", FullName: FormatGarminOnroad},
80 | {ShortName: "o", FullName: FormatGarminOpenTopo},
81 | {ShortName: "O", FullName: FormatGarminOSM},
82 | {ShortName: "m", FullName: FormatMapsforge},
83 | {ShortName: "M", FullName: FormatMBTiles},
84 | {ShortName: "C", FullName: FormatCSV},
85 | }
86 |
87 | shortNames := make([]string, 0, len(fullFormatNames))
88 |
89 | for _, fullName := range fullFormatNames {
90 | for _, format := range miniFormats {
91 | if fullName == format.FullName {
92 | shortNames = append(shortNames, format.ShortName)
93 |
94 | break
95 | }
96 | }
97 | }
98 |
99 | return strings.Join(shortNames, "")
100 | }
101 |
102 | // GetFormats returns a slice of format strings based on the configuration map.
103 | // The config map should contain keys like KeyOsmPbf with boolean true/false.
104 | func GetFormats(config map[string]bool) []string {
105 | options := map[string]string{
106 | KeyOsmPbf: FormatOsmPbf,
107 | KeyOshPbf: FormatOshPbf,
108 | KeyOsmGz: FormatOsmGz,
109 | KeyOsmBz2: FormatOsmBz2,
110 | KeyShpZip: FormatShpZip,
111 | KeyState: FormatState,
112 | KeyPoly: FormatPoly,
113 | KeyKml: FormatKml,
114 | KeyGeoJSON: FormatGeoJSON,
115 | KeyGarminOSM: FormatGarminOSM,
116 | KeyMapsforge: FormatMapsforge,
117 | KeyMBTiles: FormatMBTiles,
118 | KeyCSV: FormatCSV,
119 | KeyGarminOnroad: FormatGarminOnroad,
120 | KeyGarminOntrail: FormatGarminOntrail,
121 | KeyGarminOpenTopo: FormatGarminOpenTopo,
122 | }
123 |
124 | var formatList []string
125 |
126 | for key, format := range options {
127 | if enabled, ok := config[key]; ok && enabled {
128 | formatList = append(formatList, format)
129 | }
130 | }
131 |
132 | if len(formatList) == 0 {
133 | formatList = append(formatList, FormatOsmPbf)
134 | }
135 |
136 | sort.Strings(formatList)
137 |
138 | return formatList
139 | }
140 |
--------------------------------------------------------------------------------
/internal/generator/generator.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "log/slog"
7 | "os"
8 | "path/filepath"
9 | "slices"
10 |
11 | pb "github.com/cheggaaa/pb/v3"
12 | "github.com/gocolly/colly/v2"
13 | "github.com/julien-noblet/download-geofabrik/internal/config"
14 | "github.com/julien-noblet/download-geofabrik/internal/generator/importer/geofabrik"
15 | "github.com/julien-noblet/download-geofabrik/internal/scrapper"
16 | "github.com/julien-noblet/download-geofabrik/internal/scrapper/bbbike"
17 | "github.com/julien-noblet/download-geofabrik/internal/scrapper/geo2day"
18 | geofabrikScrapper "github.com/julien-noblet/download-geofabrik/internal/scrapper/geofabrik"
19 | "github.com/julien-noblet/download-geofabrik/internal/scrapper/openstreetmapfr"
20 | )
21 |
22 | const (
23 | filePermission = 0o600
24 | ServiceGeofabrik = "geofabrik"
25 | ServiceGeofabrikParse = "geofabrik-parse"
26 | ServiceOpenStreetMapFR = "openstreetmap.fr"
27 | ServiceGeo2Day = "geo2day"
28 | ServiceBBBike = "bbbike"
29 | )
30 |
31 | var ErrUnknownService = errors.New("unknown service")
32 |
33 | // Write writes the generated configuration to a file.
34 | func Write(c *config.Config, filename string) error {
35 | out, err := c.Generate()
36 | if err != nil {
37 | return fmt.Errorf("failed to generate config: %w", err)
38 | }
39 |
40 | filename, err = filepath.Abs(filename)
41 | if err != nil {
42 | return fmt.Errorf("failed to get absolute path for filename: %w", err)
43 | }
44 |
45 | if err := os.WriteFile(filename, out, filePermission); err != nil {
46 | return fmt.Errorf("failed to write file: %w", err)
47 | }
48 |
49 | slog.Info("Generated config file", "file", filename)
50 |
51 | return nil
52 | }
53 |
54 | // Generate generates the configuration based on the specified service.
55 | func Generate(service string, progress bool, configfile string) error {
56 | // The original `Generate` function used a map of handlers.
57 | // With the refactoring, `PerformGenerate` now encapsulates the logic
58 | // for all services, including the distinction between Geofabrik and
59 | // the scrapper-based services.
60 | // Therefore, `Generate` can directly call `PerformGenerate`.
61 | return PerformGenerate(service, progress, configfile)
62 | }
63 |
64 | // PerformGenerate handles the generation logic for all services.
65 | func PerformGenerate(service string, progress bool, configfile string) error {
66 | var myScrapper scrapper.IScrapper
67 |
68 | switch service {
69 | case ServiceGeofabrik:
70 | return handleGeofabrik(configfile, progress)
71 | case ServiceGeofabrikParse:
72 | myScrapper = geofabrikScrapper.GetDefault()
73 | case ServiceOpenStreetMapFR:
74 | myScrapper = openstreetmapfr.GetDefault()
75 | case ServiceGeo2Day:
76 | myScrapper = geo2day.GetDefault()
77 | case ServiceBBBike:
78 | myScrapper = bbbike.GetDefault()
79 | default:
80 | return fmt.Errorf("%w: %s", ErrUnknownService, service)
81 | }
82 |
83 | if progress {
84 | handleProgress(myScrapper)
85 | } else {
86 | collector := myScrapper.Collector()
87 | visitAndWait(collector, myScrapper.GetStartURL())
88 | }
89 |
90 | myconfig := myScrapper.GetConfig()
91 | Cleanup(myconfig)
92 |
93 | if err := Write(myconfig, configfile); err != nil {
94 | slog.Error("Failed to write config", "error", err)
95 |
96 | return err
97 | }
98 |
99 | return nil
100 | }
101 |
102 | // handleGeofabrik handles the Geofabrik service.
103 | func handleGeofabrik(configfile string, _ bool) error {
104 | index, err := geofabrik.GetIndex(geofabrik.GeofabrikIndexURL)
105 | if err != nil {
106 | slog.Error("Failed to get geofabrik index", "error", err)
107 |
108 | return fmt.Errorf("failed to get index: %w", err)
109 | }
110 |
111 | myConfig, err := geofabrik.Convert(index)
112 | if err != nil {
113 | slog.Error("Failed to convert geofabrik index", "error", err)
114 |
115 | return fmt.Errorf("failed to convert index: %w", err)
116 | }
117 |
118 | Cleanup(myConfig)
119 |
120 | if err := Write(myConfig, configfile); err != nil {
121 | slog.Error("Failed to write config", "error", err)
122 |
123 | return err
124 | }
125 |
126 | return nil
127 | }
128 |
129 | // handleProgress handles the progress bar for the scrapper.
130 | func handleProgress(myScrapper scrapper.IScrapper) {
131 | bar := pb.New(myScrapper.GetPB())
132 | bar.Start()
133 |
134 | defer bar.Finish()
135 |
136 | collector := myScrapper.Collector()
137 | collector.OnScraped(func(*colly.Response) {
138 | bar.Increment()
139 | })
140 | visitAndWait(collector, myScrapper.GetStartURL())
141 | }
142 |
143 | // visitAndWait visits the URL and waits for the collector to finish.
144 | func visitAndWait(collector *colly.Collector, url string) {
145 | if err := collector.Visit(url); err != nil {
146 | slog.Error("Can't get url", "error", err)
147 |
148 | return
149 | }
150 |
151 | collector.Wait()
152 | }
153 |
154 | // Cleanup sorts the formats in the configuration elements.
155 | func Cleanup(c *config.Config) {
156 | for _, elem := range c.Elements {
157 | slices.Sort(elem.Formats)
158 | }
159 | }
160 |
--------------------------------------------------------------------------------
/internal/config/config_test.go:
--------------------------------------------------------------------------------
1 | package config_test
2 |
3 | import (
4 | "os"
5 | "path/filepath"
6 | "sync"
7 | "testing"
8 |
9 | "github.com/julien-noblet/download-geofabrik/internal/config"
10 | "github.com/julien-noblet/download-geofabrik/internal/element"
11 | "github.com/julien-noblet/download-geofabrik/pkg/formats"
12 | "github.com/stretchr/testify/assert"
13 | "github.com/stretchr/testify/require"
14 | )
15 |
16 | func TestElem2preURL_WithParent(t *testing.T) {
17 | cfg := &config.Config{
18 | BaseURL: "https://example.com",
19 | Elements: element.MapElement{
20 | "parent": {
21 | ID: "parent",
22 | Name: "Parent Element",
23 | },
24 | "child": {
25 | ID: "child",
26 | Name: "Child Element",
27 | Parent: "parent",
28 | },
29 | },
30 | Formats: formats.FormatDefinitions{},
31 | ElementsMutex: &sync.RWMutex{},
32 | }
33 |
34 | childElem, err := cfg.GetElement("child")
35 | require.NoError(t, err)
36 |
37 | url, err := config.Elem2preURL(cfg, childElem)
38 | require.NoError(t, err)
39 |
40 | expectedURL := "https://example.com/parent/child"
41 | assert.Equal(t, expectedURL, url)
42 | }
43 |
44 | func TestElem2preURL_NoParent_WithBaseURLArgs(t *testing.T) {
45 | cfg := &config.Config{
46 | BaseURL: "https://example.com",
47 | Elements: element.MapElement{
48 | "item": {ID: "item"},
49 | },
50 | ElementsMutex: &sync.RWMutex{},
51 | }
52 | elem, err := cfg.GetElement("item")
53 | require.NoError(t, err)
54 |
55 | // Case 1: 1 arg
56 | url, err := config.Elem2preURL(cfg, elem, "custom")
57 | require.NoError(t, err)
58 | assert.Equal(t, "https://example.com/custom/item", url)
59 |
60 | // Case 2: 2 args
61 | url, err = config.Elem2preURL(cfg, elem, "http://other.com", "path")
62 | require.NoError(t, err)
63 | assert.Equal(t, "http://other.com/path/item", url)
64 |
65 | // Case default: 0 args
66 | url, err = config.Elem2preURL(cfg, elem)
67 | require.NoError(t, err)
68 | assert.Equal(t, "https://example.com/item", url)
69 | }
70 |
71 | func TestGenerate(t *testing.T) {
72 | cfg := &config.Config{
73 | BaseURL: "https://example.com",
74 | Elements: element.MapElement{
75 | "one": {ID: "one"},
76 | },
77 | }
78 | data, err := cfg.Generate()
79 | require.NoError(t, err)
80 | assert.Contains(t, string(data), "https://example.com")
81 | assert.Contains(t, string(data), "one")
82 | }
83 |
84 | func TestMergeElement(t *testing.T) {
85 | cfg := &config.Config{
86 | Elements: make(element.MapElement),
87 | ElementsMutex: &sync.RWMutex{},
88 | }
89 |
90 | el1 := &element.Element{ID: "e1", Parent: "p1", Formats: []string{"osm"}}
91 | err := cfg.MergeElement(el1)
92 | require.NoError(t, err)
93 | assert.True(t, cfg.Exist("e1"))
94 |
95 | // Merge update checks
96 | el2 := &element.Element{ID: "e1", Parent: "p1", Formats: []string{"pbf"}}
97 | err = cfg.MergeElement(el2)
98 | require.NoError(t, err)
99 |
100 | e, err := cfg.GetElement("e1")
101 | require.NoError(t, err)
102 | assert.Contains(t, e.Formats, "osm")
103 | assert.Contains(t, e.Formats, "pbf")
104 |
105 | // Parent mismatch
106 | elBad := &element.Element{ID: "e1", Parent: "p2"}
107 | err = cfg.MergeElement(elBad)
108 | assert.ErrorIs(t, err, config.ErrParentMismatch)
109 | }
110 |
111 | func TestAddExtension(t *testing.T) {
112 | cfg := &config.Config{
113 | Elements: element.MapElement{
114 | "e1": {ID: "e1", Formats: []string{"osm"}},
115 | },
116 | ElementsMutex: &sync.RWMutex{},
117 | }
118 |
119 | cfg.AddExtension("e1", "pbf")
120 | e, _ := cfg.GetElement("e1")
121 | assert.Contains(t, e.Formats, "pbf")
122 | }
123 |
124 | func TestElem2URL(t *testing.T) {
125 | cfg := &config.Config{
126 | BaseURL: "https://example.com",
127 | Formats: formats.FormatDefinitions{
128 | "osm.pbf": {Loc: "-latest.osm.pbf"},
129 | },
130 | Elements: element.MapElement{
131 | "e1": {ID: "e1", Formats: []string{"osm.pbf"}},
132 | },
133 | ElementsMutex: &sync.RWMutex{},
134 | }
135 |
136 | e, _ := cfg.GetElement("e1")
137 | url, err := config.Elem2URL(cfg, e, "osm.pbf")
138 | require.NoError(t, err)
139 | assert.Equal(t, "https://example.com/e1-latest.osm.pbf", url)
140 |
141 | _, err = config.Elem2URL(cfg, e, "missing")
142 | assert.ErrorIs(t, err, config.ErrFormatNotExist)
143 | }
144 |
145 | func TestLoadConfig(t *testing.T) {
146 | content := `
147 | baseURL: https://test.com
148 | elements:
149 | e1:
150 | id: e1
151 | `
152 | tmpDir := t.TempDir()
153 | f := filepath.Join(tmpDir, "config.yml")
154 | err := os.WriteFile(f, []byte(content), 0o600)
155 | require.NoError(t, err)
156 |
157 | cfg, err := config.LoadConfig(f)
158 | require.NoError(t, err)
159 | assert.Equal(t, "https://test.com", cfg.BaseURL)
160 | assert.True(t, cfg.Exist("e1"))
161 | }
162 |
163 | func TestIsHashable(t *testing.T) {
164 | cfg := &config.Config{
165 | Formats: formats.FormatDefinitions{
166 | "osm.pbf": {},
167 | "osm.pbf.md5": {},
168 | },
169 | }
170 | isH, hash, ext := config.IsHashable(cfg, "osm.pbf")
171 | assert.True(t, isH)
172 | assert.Equal(t, "osm.pbf.md5", hash)
173 | assert.Equal(t, "md5", ext)
174 |
175 | isH, _, _ = config.IsHashable(cfg, "other")
176 | assert.False(t, isH)
177 | }
178 |
--------------------------------------------------------------------------------
/pkg/formats/formats_test.go:
--------------------------------------------------------------------------------
1 | package formats_test
2 |
3 | import (
4 | "reflect"
5 | "testing"
6 |
7 | "github.com/julien-noblet/download-geofabrik/internal/config"
8 | "github.com/julien-noblet/download-geofabrik/pkg/formats"
9 | )
10 |
11 | func Benchmark_miniFormats_parse_geofabrik_yml(b *testing.B) {
12 | // run the Fib function b.N times
13 | c, _ := config.LoadConfig("../geofabrik.yml")
14 |
15 | for range make([]struct{}, b.N) {
16 | for _, v := range c.Elements {
17 | formats.GetMiniFormats(v.Formats)
18 | }
19 | }
20 | }
21 |
22 | func Test_GetMiniFormats(t *testing.T) {
23 | t.Parallel()
24 |
25 | type args struct {
26 | s []string
27 | }
28 |
29 | tests := []struct {
30 | name string
31 | want string
32 | args args
33 | }{
34 | // TODO: Add test cases.
35 | {name: "No Formats", args: args{s: []string(nil)}, want: ""},
36 | {name: "state only", args: args{s: []string{formats.FormatState}}, want: "s"},
37 | {name: "osm.pbf only", args: args{s: []string{formats.FormatOsmPbf}}, want: "P"},
38 | {name: "osm.bz2 only", args: args{s: []string{formats.FormatOsmBz2}}, want: "B"},
39 | {name: "osm.gz only", args: args{s: []string{formats.FormatOsmGz}}, want: "G"},
40 | {name: "osh.pbf only", args: args{s: []string{formats.FormatOshPbf}}, want: "H"},
41 | {name: "poly only", args: args{s: []string{formats.FormatPoly}}, want: "p"},
42 | {name: "shp.zip only", args: args{s: []string{formats.FormatShpZip}}, want: "S"},
43 | {name: "kml only", args: args{s: []string{formats.FormatKml}}, want: "k"},
44 | {name: "state and osm.pbf", args: args{s: []string{formats.FormatOsmPbf, formats.FormatState}}, want: "Ps"},
45 | {name: "state and osm.bz2", args: args{s: []string{formats.FormatState, formats.FormatOsmBz2}}, want: "sB"},
46 | {name: "state and osh.pbf", args: args{s: []string{formats.FormatOshPbf, formats.FormatState}}, want: "Hs"},
47 | {name: "state and osm.bz2", args: args{s: []string{formats.FormatState, formats.FormatOsmBz2}}, want: "sB"},
48 | {name: "state and osm.bz2", args: args{s: []string{formats.FormatState, formats.FormatOsmBz2}}, want: "sB"},
49 | {name: "state and poly", args: args{s: []string{formats.FormatState, formats.FormatPoly}}, want: "sp"},
50 | {name: "state and shp.zip", args: args{s: []string{formats.FormatState, formats.FormatShpZip}}, want: "sS"},
51 | {name: "state and kml", args: args{s: []string{formats.FormatState, formats.FormatKml}}, want: "sk"},
52 | {name: "state and geojson", args: args{s: []string{formats.FormatState, formats.FormatGeoJSON}}, want: "sg"},
53 | {name: "osm.pbf and geojson", args: args{s: []string{formats.FormatOsmPbf, formats.FormatGeoJSON}}, want: "Pg"},
54 | // Not testing all combinaisons!
55 | {name: "osm.pbf and shp.zip", args: args{s: []string{formats.FormatOsmPbf, formats.FormatShpZip}}, want: "PS"},
56 | // With all
57 | {
58 | name: "All formats",
59 | args: args{s: []string{
60 | formats.FormatState,
61 | formats.FormatOsmBz2,
62 | formats.FormatOsmPbf,
63 | "osh.pbh",
64 | formats.FormatPoly,
65 | formats.FormatKml,
66 | formats.FormatShpZip,
67 | }},
68 | want: "sBPpkS",
69 | },
70 | }
71 |
72 | for _, tt := range tests {
73 | t.Run(tt.name, func(t *testing.T) {
74 | t.Parallel()
75 |
76 | if got := formats.GetMiniFormats(tt.args.s); got != tt.want {
77 | t.Errorf("formats.MiniFormats() = %v, want %v", got, tt.want)
78 | }
79 | })
80 | }
81 | }
82 |
83 | func Test_getFormats(t *testing.T) {
84 | t.Parallel()
85 |
86 | tests := []struct {
87 | flags map[string]bool
88 | name string
89 | want []string
90 | }{
91 | {
92 | name: "none",
93 | flags: map[string]bool{},
94 | want: []string{formats.FormatOsmPbf},
95 | },
96 | {
97 | name: "dosmPbf",
98 | flags: map[string]bool{formats.KeyOsmPbf: true},
99 | want: []string{formats.FormatOsmPbf},
100 | },
101 | {
102 | name: "doshPbf",
103 | flags: map[string]bool{formats.KeyOshPbf: true},
104 | want: []string{formats.FormatOshPbf},
105 | },
106 | {
107 | name: "dosmPbf doshPbf",
108 | flags: map[string]bool{
109 | formats.KeyOsmPbf: true,
110 | formats.KeyOshPbf: true,
111 | },
112 | want: []string{formats.FormatOshPbf, formats.FormatOsmPbf},
113 | },
114 | {
115 | name: "dosmBz2 dshpZip",
116 | flags: map[string]bool{
117 | formats.KeyOsmBz2: true,
118 | formats.KeyShpZip: true,
119 | },
120 | want: []string{formats.FormatOsmBz2, formats.FormatShpZip},
121 | },
122 | {
123 | name: "dstate dpoly",
124 | flags: map[string]bool{
125 | formats.KeyState: true,
126 | formats.KeyPoly: true,
127 | },
128 | want: []string{formats.FormatPoly, formats.FormatState},
129 | },
130 | {
131 | name: "dkml",
132 | flags: map[string]bool{formats.KeyKml: true},
133 | want: []string{formats.FormatKml},
134 | },
135 | {
136 | name: "dosmGz",
137 | flags: map[string]bool{formats.KeyOsmGz: true},
138 | want: []string{formats.FormatOsmGz},
139 | },
140 | {
141 | name: "dgeojson",
142 | flags: map[string]bool{formats.KeyGeoJSON: true},
143 | want: []string{formats.FormatGeoJSON},
144 | },
145 | }
146 |
147 | for _, thisTest := range tests {
148 | t.Run(thisTest.name, func(t *testing.T) {
149 | t.Parallel()
150 |
151 | if got := formats.GetFormats(thisTest.flags); !reflect.DeepEqual(got, thisTest.want) {
152 | t.Errorf("formats.GetFormats() = %v, want %v", got, thisTest.want)
153 | }
154 | })
155 | }
156 | }
157 |
--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
1 | version: "2"
2 | linters:
3 | enable:
4 | - asasalint
5 | - asciicheck
6 | - bidichk
7 | - bodyclose
8 | - canonicalheader
9 | - containedctx
10 | - contextcheck
11 | - copyloopvar
12 | - cyclop
13 | - decorder
14 | - depguard
15 | - dogsled
16 | - dupl
17 | - dupword
18 | - durationcheck
19 | - err113
20 | - errchkjson
21 | - errname
22 | - errorlint
23 | - exhaustive
24 | - exptostd
25 | - fatcontext
26 | - forbidigo
27 | - forcetypeassert
28 | - funlen
29 | - ginkgolinter
30 | - gocheckcompilerdirectives
31 | - gochecknoinits
32 | - gochecksumtype
33 | - gocognit
34 | - goconst
35 | - gocritic
36 | - gocyclo
37 | - godot
38 | - godox
39 | - goheader
40 | - gomoddirectives
41 | - gomodguard
42 | - goprintffuncname
43 | - gosec
44 | - gosmopolitan
45 | - grouper
46 | - iface
47 | - importas
48 | - inamedparam
49 | - interfacebloat
50 | - intrange
51 | - ireturn
52 | - lll
53 | - loggercheck
54 | - maintidx
55 | - makezero
56 | - mirror
57 | - misspell
58 | - mnd
59 | - musttag
60 | - nakedret
61 | - nestif
62 | - nilerr
63 | - nilnesserr
64 | - nilnil
65 | - nlreturn
66 | - noctx
67 | - nolintlint
68 | - nosprintfhostport
69 | - paralleltest
70 | - perfsprint
71 | - prealloc
72 | - predeclared
73 | - promlinter
74 | - protogetter
75 | - reassign
76 | - recvcheck
77 | - revive
78 | - rowserrcheck
79 | - sloglint
80 | - spancheck
81 | - sqlclosecheck
82 | - staticcheck
83 | - tagalign
84 | - tagliatelle
85 | - testableexamples
86 | - testifylint
87 | - testpackage
88 | - thelper
89 | - tparallel
90 | - unconvert
91 | - unparam
92 | - usestdlibvars
93 | - usetesting
94 | - varnamelen
95 | - wastedassign
96 | - whitespace
97 | - wrapcheck
98 | - wsl_v5
99 | - zerologlint
100 | disable:
101 | - exhaustruct
102 | - gochecknoglobals
103 | - nonamedreturns
104 | settings:
105 | depguard:
106 | rules:
107 | main:
108 | list-mode: lax
109 | allow:
110 | - github.com/apex/log
111 | - github.com/julen-noblet/download-geofabrik/*
112 | dupl:
113 | threshold: 150
114 | funlen:
115 | lines: 100
116 | statements: 50
117 | goconst:
118 | min-len: 2
119 | min-occurrences: 3
120 | gocritic:
121 | enable-all: true
122 | gocyclo:
123 | min-complexity: 20
124 | govet:
125 | enable:
126 | - appends
127 | - asmdecl
128 | - assign
129 | - atomic
130 | - atomicalign
131 | - bools
132 | - buildtag
133 | - cgocall
134 | - composites
135 | - copylocks
136 | - deepequalerrors
137 | - defers
138 | - directive
139 | - errorsas
140 | - fieldalignment
141 | - findcall
142 | - framepointer
143 | - httpresponse
144 | - ifaceassert
145 | - loopclosure
146 | - lostcancel
147 | - nilfunc
148 | - nilness
149 | - printf
150 | - reflectvaluecompare
151 | - shadow
152 | - shift
153 | - sigchanyzer
154 | - slog
155 | - sortslice
156 | - stdmethods
157 | - stdversion
158 | - stringintconv
159 | - structtag
160 | - testinggoroutine
161 | - tests
162 | - timeformat
163 | - unmarshal
164 | - unreachable
165 | - unsafeptr
166 | - unusedresult
167 | - unusedwrite
168 | - waitgroup
169 | settings:
170 | printf:
171 | funcs:
172 | - (github.com/golangci/golangci-lint/pkg/logutils.Log).Infof
173 | - (github.com/golangci/golangci-lint/pkg/logutils.Log).Warnf
174 | - (github.com/golangci/golangci-lint/pkg/logutils.Log).Errorf
175 | - (github.com/golangci/golangci-lint/pkg/logutils.Log).Fatalf
176 | lll:
177 | line-length: 150
178 | misspell:
179 | locale: US
180 | nakedret:
181 | max-func-lines: 30
182 | prealloc:
183 | simple: true
184 | range-loops: true
185 | for-loops: true
186 | unparam:
187 | check-exported: false
188 | unused:
189 | exported-fields-are-used: false
190 | whitespace:
191 | multi-if: false
192 | multi-func: true
193 | wsl_v5:
194 | allow-first-in-block: true
195 | allow-whole-block: false
196 | branch-max-lines: 2
197 | case-max-lines: 2
198 | exclusions:
199 | generated: lax
200 | presets:
201 | - comments
202 | - common-false-positives
203 | - legacy
204 | - std-error-handling
205 | rules:
206 | - linters:
207 | - cyclop
208 | - dupl
209 | - exhaustruct
210 | - funlen
211 | - gochecknoinits
212 | - gocognit
213 | - gocyclo
214 | - godox
215 | - maintidx
216 | - paralleltest
217 | - varnamelen
218 | path: _test.go
219 | paths:
220 | - third_party$
221 | - builtin$
222 | - examples$
223 | formatters:
224 | enable:
225 | - gci
226 | - gofmt
227 | - gofumpt
228 | - goimports
229 | settings:
230 | goimports:
231 | local-prefixes:
232 | - github.com/golangci/golangci-lint
233 | exclusions:
234 | generated: lax
235 | paths:
236 | - third_party$
237 | - builtin$
238 | - examples$
239 |
--------------------------------------------------------------------------------
/internal/downloader/download_test.go:
--------------------------------------------------------------------------------
1 | package download_test
2 |
3 | import (
4 | "context"
5 | "os"
6 | "testing"
7 |
8 | "github.com/julien-noblet/download-geofabrik/internal/config"
9 | download "github.com/julien-noblet/download-geofabrik/internal/downloader"
10 | "github.com/julien-noblet/download-geofabrik/internal/element"
11 | "github.com/julien-noblet/download-geofabrik/pkg/formats"
12 | )
13 |
14 | func Test_DownloadFromURL(t *testing.T) {
15 | t.Parallel()
16 |
17 | type args struct {
18 | myURL string
19 | fileName string
20 | }
21 |
22 | tests := []struct {
23 | name string
24 | args args
25 | fNodownload bool
26 | fQuiet bool
27 | fProgress bool
28 | wantErr bool
29 | }{
30 | {
31 | name: "try fNodownload=true",
32 | args: args{
33 | myURL: "https://download.geofabrik.de/this_url_should_not_exist",
34 | fileName: "/tmp/download-geofabrik.test",
35 | },
36 | fNodownload: true,
37 | wantErr: false,
38 | },
39 | {
40 | name: "404 error from geofabrik",
41 | fNodownload: false,
42 | args: args{
43 | myURL: "https://download.geofabrik.de/this_url_should_not_exist",
44 | fileName: "/tmp/download-geofabrik.test",
45 | },
46 | wantErr: true,
47 | },
48 | {
49 | name: "OK download from geofabrik",
50 | fNodownload: false,
51 | fQuiet: false,
52 | fProgress: true,
53 | args: args{
54 | myURL: "https://download.geofabrik.de/europe/andorra.poly",
55 | fileName: "/tmp/download-geofabrik.test",
56 | },
57 | wantErr: false,
58 | },
59 | }
60 |
61 | for _, thisTest := range tests {
62 | t.Run(thisTest.name, func(t *testing.T) {
63 | t.Parallel()
64 |
65 | opts := &config.Options{
66 | NoDownload: thisTest.fNodownload,
67 | Quiet: thisTest.fQuiet,
68 | Progress: thisTest.fProgress,
69 | OutputDirectory: "/tmp/",
70 | FormatFlags: make(map[string]bool),
71 | }
72 | cfg := &config.Config{} // Empty config for FromURL
73 |
74 | d := download.NewDownloader(cfg, opts)
75 |
76 | if err := d.FromURL(context.Background(), thisTest.args.myURL, thisTest.args.fileName); (err != nil) != thisTest.wantErr {
77 | t.Errorf("Downloader.FromURL() error = %v, wantErr %v", err, thisTest.wantErr)
78 | }
79 | })
80 | }
81 | }
82 |
83 | func TestFile(t *testing.T) {
84 | t.Parallel()
85 |
86 | type args struct {
87 | configPtr *config.Config
88 | element string
89 | format string
90 | output string
91 | }
92 |
93 | tests := []struct {
94 | name string
95 | args args
96 | wantErr bool
97 | }{
98 | {
99 | name: "TestFile",
100 | args: args{
101 | configPtr: &config.Config{
102 | Formats: formats.FormatDefinitions{
103 | formats.FormatPoly: {ID: formats.FormatPoly, Loc: ".poly", ToLoc: "", BasePath: "polygons/", BaseURL: ""},
104 | },
105 | Elements: element.MapElement{
106 | "africa": element.Element{ID: "africa", Name: "Africa", Formats: []string{formats.FormatPoly}},
107 | },
108 | BaseURL: `https://download.openstreetmap.fr/`,
109 | },
110 | element: "africa",
111 | format: formats.FormatPoly,
112 | output: "/tmp/download-geofabrik.test",
113 | },
114 | wantErr: false,
115 | },
116 | }
117 |
118 | for _, tt := range tests {
119 | t.Run(tt.name, func(t *testing.T) {
120 | t.Parallel()
121 |
122 | opts := &config.Options{
123 | Verbose: true,
124 | OutputDirectory: "/tmp/",
125 | FormatFlags: make(map[string]bool),
126 | }
127 |
128 | d := download.NewDownloader(tt.args.configPtr, opts)
129 |
130 | err := d.DownloadFile(context.Background(), tt.args.element, tt.args.format, tt.args.output)
131 | if (err != nil) != tt.wantErr {
132 | t.Errorf("DownloadFile() error = %v, wantErr %v", err, tt.wantErr)
133 | }
134 | })
135 | }
136 | }
137 |
138 | func TestChecksum(t *testing.T) {
139 | t.Parallel()
140 |
141 | cfg := &config.Config{
142 | Formats: formats.FormatDefinitions{
143 | formats.FormatOsmPbf: {ID: formats.FormatOsmPbf, Loc: "-latest.osm.pbf"},
144 | "osm.pbf.md5": {ID: "osm.pbf.md5", Loc: "-latest.osm.pbf.md5"},
145 | formats.FormatPoly: {ID: formats.FormatPoly, Loc: ".poly"},
146 | },
147 | Elements: element.MapElement{
148 | "monaco": element.Element{ID: "monaco", Name: "Monaco", Formats: []string{formats.FormatOsmPbf, "osm.pbf.md5", formats.FormatPoly}},
149 | },
150 | BaseURL: "https://download.geofabrik.de/europe",
151 | }
152 |
153 | opts := &config.Options{
154 | Check: true,
155 | OutputDirectory: "/tmp/",
156 | FormatFlags: make(map[string]bool),
157 | }
158 |
159 | d := download.NewDownloader(cfg, opts)
160 |
161 | // Download monaco first
162 | err := d.DownloadFile(context.Background(), "monaco", formats.FormatOsmPbf, "/tmp/monaco.osm.pbf")
163 | if err != nil {
164 | t.Fatalf("Failed setup download: %v", err)
165 | }
166 |
167 | t.Cleanup(func() { _ = os.Remove("/tmp/monaco.osm.pbf") })
168 | t.Cleanup(func() { _ = os.Remove("/tmp/monaco.osm.pbf.md5") })
169 |
170 | // Test Checksum
171 | tests := []struct {
172 | name string
173 | format string
174 | check bool
175 | want bool
176 | }{
177 | {"No Check Poly", formats.FormatPoly, false, false},
178 | {"Check Poly (no MD5 def)", formats.FormatPoly, true, false},
179 | {"Check PBF", formats.FormatOsmPbf, true, true},
180 | }
181 |
182 | for _, tt := range tests {
183 | t.Run(tt.name, func(t *testing.T) {
184 | t.Parallel()
185 |
186 | d.Options.Check = tt.check
187 |
188 | got := d.Checksum(context.Background(), "monaco", tt.format)
189 | if got != tt.want {
190 | t.Errorf("Checksum() = %v, want %v", got, tt.want)
191 | }
192 | })
193 | }
194 | }
195 |
--------------------------------------------------------------------------------
/internal/scrapper/scrapper.go:
--------------------------------------------------------------------------------
1 | package scrapper
2 |
3 | import (
4 | "errors"
5 | "log/slog"
6 | "net"
7 | "net/http"
8 | "regexp"
9 | "strings"
10 | "sync"
11 | "time"
12 |
13 | "github.com/gocolly/colly/v2"
14 | "github.com/julien-noblet/download-geofabrik/internal/config"
15 | "github.com/julien-noblet/download-geofabrik/internal/element"
16 | "github.com/julien-noblet/download-geofabrik/pkg/formats"
17 | )
18 |
19 | // IScrapper represents a colly Scrapper.
20 | type IScrapper interface {
21 | GetConfig() *config.Config
22 | Collector() *colly.Collector
23 | Limit() *colly.LimitRule
24 | GetPB() int
25 | GetStartURL() string
26 | ParseFormat(id, format string)
27 | }
28 |
29 | // Scrapper defines a default scrapper.
30 | type Scrapper struct {
31 | Config *config.Config
32 | FormatDefinition formats.FormatDefinitions
33 | BaseURL string
34 | DomainGlob string
35 | StartURL string
36 | URLFilters []*regexp.Regexp
37 | AllowedDomains []string
38 | Timeout time.Duration
39 | RandomDelay time.Duration
40 | MaxDepth int
41 | Parallelism int
42 | PB int
43 | mu sync.RWMutex
44 | Async bool
45 | }
46 |
47 | const (
48 | defaultRandomDelay = 5 * time.Second
49 | defaultTimeout = 60 * time.Second
50 | defaultKeepAlive = 30 * time.Second
51 | defaultIdleConnTimeout = 5 * time.Second
52 | defaultTLSHandshakeTimeout = 10 * time.Second
53 | defaultExpectContinueTimeout = 5 * time.Second
54 | minParentListLength = 5
55 | )
56 |
57 | // NewScrapper creates a new Scrapper instance with default values.
58 | func NewScrapper(baseURL, startURL string, allowedDomains []string) *Scrapper {
59 | return &Scrapper{
60 | RandomDelay: defaultRandomDelay,
61 | Timeout: defaultTimeout,
62 | Parallelism: 1,
63 | BaseURL: baseURL,
64 | StartURL: startURL,
65 | AllowedDomains: allowedDomains,
66 | }
67 | }
68 |
69 | // GetConfig initializes a *config.Config from fields.
70 | func (s *Scrapper) GetConfig() *config.Config {
71 | s.mu.RLock()
72 |
73 | if s.Config != nil {
74 | defer s.mu.RUnlock()
75 |
76 | return s.Config
77 | }
78 |
79 | s.mu.RUnlock()
80 |
81 | s.mu.Lock()
82 | defer s.mu.Unlock()
83 |
84 | s.Config = s.initializeConfig()
85 |
86 | return s.Config
87 | }
88 |
89 | // initializeConfig initializes the configuration with default values.
90 | func (s *Scrapper) initializeConfig() *config.Config {
91 | return &config.Config{
92 | Elements: element.MapElement{},
93 | ElementsMutex: &sync.RWMutex{},
94 | Formats: s.FormatDefinition,
95 | BaseURL: s.BaseURL,
96 | }
97 | }
98 |
99 | // Limit defines LimitRules.
100 | func (s *Scrapper) Limit() *colly.LimitRule {
101 | if s.DomainGlob == "" {
102 | s.DomainGlob = "*"
103 | }
104 |
105 | if s.Parallelism <= 1 {
106 | s.Parallelism = 1
107 | }
108 |
109 | return &colly.LimitRule{
110 | DomainGlob: s.DomainGlob,
111 | Parallelism: s.Parallelism,
112 | RandomDelay: s.RandomDelay,
113 | }
114 | }
115 |
116 | // Collector initializes a *colly.Collector.
117 | func (s *Scrapper) Collector(_ ...interface{}) *colly.Collector {
118 | myCollector := colly.NewCollector(
119 | colly.AllowedDomains(s.AllowedDomains...),
120 | colly.URLFilters(s.URLFilters...),
121 | colly.Async(s.Async),
122 | colly.MaxDepth(s.MaxDepth),
123 | )
124 |
125 | if s.Timeout != 0 {
126 | myCollector.SetRequestTimeout(s.Timeout)
127 | }
128 |
129 | myCollector.WithTransport(&http.Transport{
130 | Proxy: http.ProxyFromEnvironment,
131 | DialContext: (&net.Dialer{
132 | Timeout: defaultTimeout,
133 | KeepAlive: defaultKeepAlive,
134 | }).DialContext,
135 | IdleConnTimeout: defaultIdleConnTimeout,
136 | TLSHandshakeTimeout: defaultTLSHandshakeTimeout,
137 | ExpectContinueTimeout: defaultExpectContinueTimeout,
138 | })
139 |
140 | s.Config = s.GetConfig()
141 | if err := myCollector.Limit(s.Limit()); err != nil {
142 | slog.Error("Can't update limit", "error", err)
143 | }
144 |
145 | myCollector.OnError(func(r *colly.Response, err error) {
146 | if !errors.Is(err, colly.ErrForbiddenURL) && !errors.Is(err, colly.ErrForbiddenDomain) && err.Error() != "Forbidden" {
147 | slog.Debug("Request failed", "url", r.Request.URL, "error", err)
148 | } else {
149 | slog.Debug("Forbidden URL", "url", r.Request.URL)
150 | }
151 | })
152 |
153 | return myCollector
154 | }
155 |
156 | // GetStartURL returns StartURL.
157 | func (s *Scrapper) GetStartURL() string {
158 | return s.StartURL
159 | }
160 |
161 | // GetPB returns PB.
162 | func (s *Scrapper) GetPB() int {
163 | return s.PB
164 | }
165 |
166 | // ParseFormat adds Extensions to ID.
167 | func (s *Scrapper) ParseFormat(id, format string) {
168 | s.AddExtension(id, format, &s.Config.Formats)
169 | }
170 |
171 | // ParseFormatService adds Extensions to ID.
172 | func (s *Scrapper) ParseFormatService(id, format string, def *formats.FormatDefinitions) {
173 | s.AddExtension(id, format, def)
174 | }
175 |
176 | // AddExtension adds an extension to the configuration.
177 | func (s *Scrapper) AddExtension(id, format string, def *formats.FormatDefinitions) {
178 | for f, i := range *def {
179 | if format == i.ID {
180 | s.Config.AddExtension(id, f)
181 | }
182 | }
183 | }
184 |
185 | // FileExt returns filename and extension.
186 | func FileExt(url string) (filename, extension string) {
187 | urls := strings.Split(url, "/")
188 | f := strings.Split(urls[len(urls)-1], ".")
189 |
190 | return f[0], strings.Join(f[1:], ".")
191 | }
192 |
193 | // GetParent returns filename and path.
194 | func GetParent(url string) (filename, path string) {
195 | r := strings.Split(url, "/")
196 | if len(r) < minParentListLength {
197 | return "", strings.Join(r[:len(r)-1], "/")
198 | }
199 |
200 | return r[len(r)-2], strings.Join(r[:len(r)-1], "/")
201 | }
202 |
--------------------------------------------------------------------------------
/internal/element/element_test.go:
--------------------------------------------------------------------------------
1 | package element_test
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/julien-noblet/download-geofabrik/internal/config"
7 | "github.com/julien-noblet/download-geofabrik/internal/element"
8 | "github.com/julien-noblet/download-geofabrik/pkg/formats"
9 | )
10 |
11 | const (
12 | geofabrikYml = "../geofabrik.yml"
13 | )
14 |
15 | func sampleAfricaElementPtr() *element.Element {
16 | return &element.Element{
17 | ID: "africa",
18 | Name: "Africa",
19 | Formats: []string{
20 | formats.FormatOsmPbf,
21 | "osm.pbf.md5",
22 | formats.FormatOsmBz2,
23 | "osm.bz2.md5",
24 | formats.FormatOshPbf,
25 | "osh.pbf.md5",
26 | formats.FormatPoly,
27 | formats.FormatKml,
28 | formats.FormatState,
29 | },
30 | }
31 | }
32 |
33 | func sampleGeorgiaUsElementPtr() *element.Element {
34 | return &element.Element{
35 | ID: "georgia-us",
36 | File: "georgia",
37 | Name: "Georgia (US State)",
38 | Formats: []string{
39 | formats.FormatOsmPbf,
40 | "osm.pbf.md5",
41 | formats.FormatShpZip,
42 | formats.FormatOsmBz2,
43 | "osm.bz2.md5",
44 | formats.FormatOshPbf,
45 | "osh.pbf.md5",
46 | formats.FormatPoly,
47 | formats.FormatKml,
48 | formats.FormatState,
49 | },
50 | Parent: "us",
51 | }
52 | }
53 |
54 | func sampleUsElementPtr() *element.Element {
55 | return &element.Element{
56 | ID: "us",
57 | Meta: true,
58 | Name: "United States of America",
59 | Parent: "north-america",
60 | }
61 | }
62 |
63 | func sampleNorthAmericaElementPtr() *element.Element {
64 | return &element.Element{
65 | ID: "north-america",
66 | Name: "North America",
67 | Formats: []string{
68 | formats.FormatOsmPbf,
69 | "osm.pbf.md5",
70 | formats.FormatOsmBz2,
71 | "osm.bz2.md5",
72 | formats.FormatOshPbf,
73 | "osh.pbf.md5",
74 | formats.FormatPoly,
75 | formats.FormatKml,
76 | formats.FormatState,
77 | },
78 | }
79 | }
80 |
81 | func sampleElementValidPtr() map[string]element.Element {
82 | return map[string]element.Element{
83 | "africa": *sampleAfricaElementPtr(),
84 | "georgia-us": *sampleGeorgiaUsElementPtr(),
85 | "us": *sampleUsElementPtr(),
86 | "north-america": *sampleNorthAmericaElementPtr(),
87 | }
88 | }
89 |
90 | func Benchmark_HasParent_parse_geofabrik_yml(b *testing.B) {
91 | c, _ := config.LoadConfig(geofabrikYml)
92 |
93 | for range make([]struct{}, b.N) {
94 | for _, v := range c.Elements {
95 | v.HasParent()
96 | }
97 | }
98 | }
99 |
100 | func TestElement_HasParent(t *testing.T) {
101 | t.Parallel()
102 |
103 | tests := []struct {
104 | name string
105 | fields element.Element
106 | want bool
107 | }{
108 | // TODO: Add test cases.
109 | {
110 | name: "us Have parent",
111 | fields: sampleElementValidPtr()["us"],
112 | want: true,
113 | },
114 | {
115 | name: "Africa Haven't parent",
116 | fields: sampleElementValidPtr()["Africa"],
117 | want: false,
118 | },
119 | {
120 | name: "Haven't parent 2",
121 | fields: element.Element{ID: "", File: "", Meta: true, Name: "", Formats: []string(nil), Parent: ""},
122 | want: false,
123 | },
124 | }
125 | for _, thisTest := range tests {
126 | t.Run(thisTest.name, func(t *testing.T) {
127 | t.Parallel()
128 |
129 | myElement := &element.Element{
130 | ID: thisTest.fields.ID,
131 | File: thisTest.fields.File,
132 | Meta: thisTest.fields.Meta,
133 | Name: thisTest.fields.Name,
134 | Formats: thisTest.fields.Formats,
135 | Parent: thisTest.fields.Parent,
136 | }
137 | if got := myElement.HasParent(); got != thisTest.want {
138 | t.Errorf("Element.HasParent() = %v, want %v", got, thisTest.want)
139 | }
140 | })
141 | }
142 | }
143 |
144 | func Benchmark_contains_parse_geofabrik_yml(b *testing.B) {
145 | myConfig, _ := config.LoadConfig(geofabrikYml)
146 | sliceE := element.Formats{}
147 |
148 | for key := range myConfig.Elements {
149 | sliceE = append(sliceE, key)
150 | }
151 |
152 | for range make([]struct{}, b.N) {
153 | for k := range myConfig.Elements {
154 | sliceE.Contains(k)
155 | }
156 | }
157 | }
158 |
159 | func Benchmark_contain_parse_geofabrik_yml_France_formats_osm_pbf(b *testing.B) {
160 | c, _ := config.LoadConfig(geofabrikYml)
161 | myformats := c.Elements["france"].Formats
162 | format := formats.FormatOsmPbf
163 |
164 | for range make([]struct{}, b.N) {
165 | myformats.Contains(format)
166 | }
167 | }
168 |
169 | func Test_MakeParent(t *testing.T) {
170 | t.Parallel()
171 |
172 | type args struct {
173 | gparent string
174 | e element.Element
175 | }
176 |
177 | tests := []struct {
178 | want *element.Element
179 | name string
180 | args args
181 | }{
182 | {
183 | name: "No Parents",
184 | args: args{e: element.Element{ID: "a", Name: "a", Parent: ""}, gparent: ""},
185 | want: nil,
186 | },
187 | {
188 | name: "Have Parent with no gparent",
189 | args: args{e: element.Element{ID: "a", Name: "a", Parent: "p"}, gparent: ""},
190 | want: &element.Element{ID: "p", Name: "p", Meta: true},
191 | },
192 | {
193 | name: "Have Parent with gparent",
194 | args: args{e: element.Element{ID: "a", Name: "a", Parent: "p"}, gparent: "gp"},
195 | want: &element.Element{ID: "p", Name: "p", Meta: true, Parent: "gp"},
196 | },
197 | }
198 |
199 | for _, thisTest := range tests {
200 | t.Run(thisTest.name, func(t *testing.T) {
201 | t.Parallel()
202 |
203 | got := element.CreateParentElement(&thisTest.args.e, thisTest.args.gparent)
204 | if got == nil && thisTest.want == nil {
205 | return
206 | }
207 |
208 | if got.ID != thisTest.want.ID ||
209 | got.Name != thisTest.want.Name ||
210 | got.Meta != thisTest.want.Meta ||
211 | got.Parent != thisTest.want.Parent {
212 | t.Errorf("element.MakeParent() = %+v,gparent = %+v, want %+v", got, thisTest.args.gparent, thisTest.want)
213 | }
214 | })
215 | }
216 | }
217 |
--------------------------------------------------------------------------------
/internal/scrapper/geofabrik/geofabrik.go:
--------------------------------------------------------------------------------
1 | package geofabrik
2 |
3 | import (
4 | "errors"
5 | "log/slog"
6 | "regexp"
7 |
8 | "github.com/gocolly/colly/v2"
9 | "github.com/julien-noblet/download-geofabrik/internal/element"
10 | "github.com/julien-noblet/download-geofabrik/internal/scrapper"
11 | "github.com/julien-noblet/download-geofabrik/pkg/formats"
12 | )
13 |
14 | // Constants for magic numbers and URLs.
15 | const (
16 | progressBarCount = 509 // number of elements
17 | parallelism = 20 // number of parallel downloads
18 | baseURL = "https://download.geofabrik.de"
19 | startURL = baseURL + "/"
20 | )
21 |
22 | // Geofabrik Scrapper.
23 | type Geofabrik struct {
24 | *scrapper.Scrapper
25 | }
26 |
27 | // GetDefault returns the default configuration for Geofabrik scrapper.
28 | func GetDefault() *Geofabrik {
29 | urlFilters := []*regexp.Regexp{
30 | regexp.MustCompile(`https://download\.geofabrik\.de/.+\.html$`),
31 | regexp.MustCompile(`https://download\.geofabrik\.de/$`),
32 | }
33 |
34 | formatDefinition := formats.FormatDefinitions{
35 | "osm.bz2.md5": {ID: "osm.bz2.md5", Loc: "-latest.osm.bz2.md5"},
36 | "osm.pbf.md5": {ID: "osm.pbf.md5", Loc: "-latest.osm.pbf.md5"},
37 | formats.FormatKml: {ID: formats.FormatKml, Loc: ".kml"},
38 | formats.FormatMBTiles: {ID: formats.FormatMBTiles, Loc: "-latest-free.mbtiles.zip", ToLoc: "latest-free.mbtiles.zip"},
39 | formats.FormatOsmBz2: {ID: formats.FormatOsmBz2, Loc: "-latest.osm.bz2"},
40 | formats.FormatOsmPbf: {ID: formats.FormatOsmPbf, Loc: "-latest.osm.pbf"},
41 | formats.FormatPoly: {ID: formats.FormatPoly, Loc: ".poly"},
42 | formats.FormatShpZip: {ID: formats.FormatShpZip, Loc: "-shortbread-1.0.mbtiles"},
43 | formats.FormatState: {ID: formats.FormatState, Loc: "-updates/state.txt"},
44 | }
45 |
46 | return &Geofabrik{
47 | Scrapper: &scrapper.Scrapper{
48 | PB: progressBarCount,
49 | Async: true,
50 | Parallelism: parallelism,
51 | MaxDepth: 0,
52 | AllowedDomains: []string{`download.geofabrik.de`},
53 | BaseURL: baseURL,
54 | StartURL: startURL,
55 | URLFilters: urlFilters,
56 | FormatDefinition: formatDefinition,
57 | },
58 | }
59 | }
60 |
61 | // Collector represents Geofabrik's scrapper.
62 | func (g *Geofabrik) Collector() *colly.Collector {
63 | myCollector := g.Scrapper.Collector()
64 | myCollector.OnHTML("#subregions", func(e *colly.HTMLElement) {
65 | g.ParseSubregion(e, myCollector)
66 | })
67 | myCollector.OnHTML("#specialsubregions", func(e *colly.HTMLElement) {
68 | g.ParseSubregion(e, myCollector)
69 | })
70 | myCollector.OnHTML("li", func(e *colly.HTMLElement) {
71 | g.ParseLi(e, myCollector)
72 | })
73 |
74 | return myCollector
75 | }
76 |
77 | // ParseSubregion parses the subregion information from the HTML.
78 | func (g *Geofabrik) ParseSubregion(e *colly.HTMLElement, myCollector *colly.Collector) {
79 | e.ForEach("td.subregion", func(_ int, el *colly.HTMLElement) {
80 | el.ForEach("a", func(_ int, sub *colly.HTMLElement) {
81 | href := sub.Request.AbsoluteURL(sub.Attr("href"))
82 | myID, extension := scrapper.FileExt(href)
83 |
84 | if extension == "html" {
85 | g.handleHTMLExtension(sub, href, myID, myCollector)
86 | }
87 | })
88 | })
89 | }
90 |
91 | // handleHTMLExtension handles the HTML extension case.
92 | func (g *Geofabrik) handleHTMLExtension(sub *colly.HTMLElement, href, myID string, myCollector *colly.Collector) {
93 | parent, parentPath := scrapper.GetParent(href)
94 | myID, file := g.handleSpecialCases(myID, parent)
95 |
96 | myElement := element.Element{
97 | ID: myID,
98 | Name: sub.Text,
99 | Parent: parent,
100 | Meta: true,
101 | File: file,
102 | }
103 |
104 | if !g.Config.Exist(parent) && parent != "" {
105 | gparent, _ := scrapper.GetParent(parentPath)
106 | slog.Debug("Create Meta", "parent", myElement.Parent, "gparent", gparent, "path", parentPath)
107 |
108 | if gp := element.CreateParentElement(&myElement, gparent); gp != nil {
109 | if err := g.Config.MergeElement(gp); err != nil {
110 | slog.Error("Can't merge", "name", myElement.Name, "error", err)
111 | }
112 | }
113 | }
114 |
115 | if err := g.Config.MergeElement(&myElement); err != nil {
116 | slog.Error("Can't merge", "name", myElement.Name, "error", err)
117 | }
118 |
119 | slog.Debug("Add", "href", href)
120 |
121 | if err := myCollector.Visit(href); err != nil && !errors.Is(err, &colly.AlreadyVisitedError{}) {
122 | slog.Error("Can't get url", "error", err)
123 | }
124 | }
125 |
126 | // handleSpecialCases handles special cases for certain IDs.
127 | func (g *Geofabrik) handleSpecialCases(myID, parent string) (newID, file string) {
128 | const georgia = "georgia"
129 |
130 | switch myID {
131 | case georgia:
132 | switch parent {
133 | case "us":
134 | myID = georgia + "-us"
135 | file = georgia
136 |
137 | case "europe":
138 | myID = georgia + "-eu"
139 | file = georgia
140 | }
141 |
142 | case "guatemala":
143 | if parent == "south-america" {
144 | myID = "guatemala-south-america"
145 | file = "guatemala"
146 | }
147 | }
148 |
149 | return myID, file
150 | }
151 |
152 | // ParseFormat adds extensions to the ID.
153 | func (g *Geofabrik) ParseFormat(id, format string) {
154 | g.Scrapper.ParseFormat(id, format)
155 |
156 | if format == formats.FormatOsmPbf {
157 | g.Config.AddExtension(id, formats.FormatKml)
158 | g.Config.AddExtension(id, formats.FormatState)
159 | }
160 | }
161 |
162 | // ParseLi parses the list items from the HTML.
163 | func (g *Geofabrik) ParseLi(e *colly.HTMLElement, _ *colly.Collector) {
164 | e.ForEach("a", func(_ int, element *colly.HTMLElement) {
165 | _, format := scrapper.FileExt(element.Attr("href"))
166 |
167 | myID, _ := scrapper.FileExt(element.Request.URL.String())
168 | grandParent, _ := scrapper.GetParent(element.Request.AbsoluteURL(element.Attr("href")))
169 | myID, _ = g.handleSpecialCases(myID, grandParent)
170 |
171 | g.ParseFormat(myID, format)
172 | })
173 | }
174 |
--------------------------------------------------------------------------------
/internal/scrapper/geo2day/geo2day.go:
--------------------------------------------------------------------------------
1 | package geo2day
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "log/slog"
7 | "regexp"
8 | "sync"
9 |
10 | "github.com/gocolly/colly/v2"
11 | "github.com/julien-noblet/download-geofabrik/internal/config"
12 | "github.com/julien-noblet/download-geofabrik/internal/element"
13 | "github.com/julien-noblet/download-geofabrik/internal/scrapper"
14 | "github.com/julien-noblet/download-geofabrik/pkg/formats"
15 | )
16 |
17 | // Constants for magic numbers and URLs.
18 | const (
19 | progressBarCount = 1003 // number of elements
20 | parallelism = 20 // number of parallel downloads
21 | baseURL = "https://geo2day.com"
22 | startURL = baseURL + "/"
23 | )
24 |
25 | // Geo2day Scrapper.
26 | type Geo2day struct {
27 | *scrapper.Scrapper
28 | }
29 |
30 | // GetDefault returns the default configuration for Osmtoday scrapper.
31 | func GetDefault() *Geo2day {
32 | urlFilters := []*regexp.Regexp{
33 | regexp.MustCompile(`https://geo2day\.com/.+\.html$`),
34 | regexp.MustCompile(`https://geo2day\.com/$`),
35 | }
36 |
37 | formatDefinition := formats.FormatDefinitions{
38 | "osm.pbf.md5": {ID: "osm.pbf.md5", Loc: ".md5"},
39 | formats.FormatGeoJSON: {ID: formats.FormatGeoJSON, Loc: ".geojson"},
40 | formats.FormatOsmPbf: {ID: formats.FormatOsmPbf, Loc: ".pbf"},
41 | formats.FormatPoly: {ID: formats.FormatPoly, Loc: ".poly"},
42 | }
43 |
44 | return &Geo2day{
45 | Scrapper: &scrapper.Scrapper{
46 | PB: progressBarCount,
47 | Async: true,
48 | Parallelism: parallelism,
49 | MaxDepth: 0,
50 | AllowedDomains: []string{`geo2day.com`},
51 | BaseURL: baseURL,
52 | StartURL: startURL,
53 | URLFilters: urlFilters,
54 | FormatDefinition: formatDefinition,
55 | Config: &config.Config{
56 | Formats: formats.FormatDefinitions{},
57 | Elements: element.MapElement{},
58 | ElementsMutex: &sync.RWMutex{},
59 | BaseURL: "",
60 | },
61 | },
62 | }
63 | }
64 |
65 | // Collector represents Osmtoday's scrapper.
66 | func (g *Geo2day) Collector() *colly.Collector {
67 | myCollector := g.Scrapper.Collector()
68 | myCollector.OnHTML(".row", func(e *colly.HTMLElement) {
69 | g.ParseLi(e, myCollector)
70 | })
71 | myCollector.OnHTML("table", func(e *colly.HTMLElement) {
72 | g.ParseSubregion(e, myCollector)
73 | })
74 |
75 | return myCollector
76 | }
77 |
78 | // Exceptions handles special cases for certain IDs.
79 | func (g *Geo2day) Exceptions(myElement *element.Element) *element.Element {
80 | exceptions := []struct {
81 | ID string
82 | Parent string
83 | }{
84 | {"la_rioja", "argentina"},
85 | {"la_rioja", "spain"},
86 | {"guyane", "france"},
87 | {"guyane", "south-america"},
88 | {"sevastopol", "ukraine"},
89 | {"sevastopol", "russia"},
90 | {"limburg", "netherlands"},
91 | {"limburg", "flanders"},
92 | {"cordoba", "argentina"},
93 | {"cordoba", "andalucia"},
94 | {"georgia", "asia"},
95 | {"georgia", "us"},
96 | }
97 |
98 | for _, exception := range exceptions {
99 | if myElement.ID == exception.ID && myElement.Parent == exception.Parent {
100 | myElement.ID = fmt.Sprintf("%s-%s", myElement.ID, myElement.Parent)
101 | }
102 | }
103 |
104 | return myElement
105 | }
106 |
107 | // ParseSubregion parses the subregion information from the HTML.
108 | func (g *Geo2day) ParseSubregion(e *colly.HTMLElement, myCollector *colly.Collector) {
109 | e.ForEach("td", func(_ int, el *colly.HTMLElement) {
110 | el.ForEach("a", func(_ int, sub *colly.HTMLElement) {
111 | href := sub.Request.AbsoluteURL(sub.Attr("href"))
112 |
113 | myID, extension := scrapper.FileExt(href)
114 | if myID == "" {
115 | slog.Debug("myID is empty", "href", href)
116 |
117 | return
118 | }
119 |
120 | if extension == "html" {
121 | g.handleHTMLExtension(sub, href, myID, myCollector)
122 | } else {
123 | parent, _ := scrapper.GetParent(href)
124 |
125 | myElement := element.Element{
126 | ID: myID,
127 | Name: sub.Text,
128 | Parent: parent,
129 | Meta: true,
130 | }
131 | myElement = *g.Exceptions(&myElement)
132 |
133 | if err := g.Config.MergeElement(&myElement); err != nil {
134 | slog.Error("Can't merge", "name", myElement.Name, "error", err)
135 | }
136 |
137 | g.ParseFormat(myElement.ID, extension)
138 | }
139 | })
140 | })
141 | }
142 |
143 | // handleHTMLExtension handles the HTML extension case.
144 | func (g *Geo2day) handleHTMLExtension(sub *colly.HTMLElement, href, myID string, myCollector *colly.Collector) {
145 | parent, parentPath := scrapper.GetParent(href)
146 |
147 | myElement := element.Element{
148 | ID: myID,
149 | Name: sub.Text,
150 | Parent: parent,
151 | Meta: true,
152 | }
153 |
154 | myElement = *g.Exceptions(&myElement)
155 |
156 | if !g.Config.Exist(parent) && parent != "" {
157 | gparent, _ := scrapper.GetParent(parentPath)
158 | slog.Debug("Create Meta", "parent", myElement.Parent, "gparent", gparent, "path", parentPath)
159 |
160 | if gp := element.CreateParentElement(&myElement, gparent); gp != nil {
161 | if err := g.Config.MergeElement(gp); err != nil {
162 | slog.Error("Can't merge", "name", myElement.Name, "error", err)
163 | }
164 | }
165 | }
166 |
167 | if err := g.Config.MergeElement(&myElement); err != nil {
168 | slog.Error("Can't merge", "name", myElement.Name, "error", err)
169 | }
170 |
171 | slog.Debug("Add", "href", href)
172 |
173 | if err := myCollector.Visit(href); err != nil && !errors.Is(err, &colly.AlreadyVisitedError{}) {
174 | slog.Error("Can't get url", "error", err)
175 | }
176 | }
177 |
178 | // ParseFormat adds extensions to the ID.
179 | func (g *Geo2day) ParseFormat(id, format string) {
180 | g.ParseFormatService(id, format, &g.FormatDefinition)
181 |
182 | if format == formats.FormatOsmPbf {
183 | g.Config.AddExtension(id, "osm.pbf.md5")
184 | }
185 | }
186 |
187 | // ParseLi parses the list items from the HTML.
188 | func (g *Geo2day) ParseLi(e *colly.HTMLElement, _ *colly.Collector) {
189 | e.ForEach("a", func(_ int, element *colly.HTMLElement) {
190 | _, format := scrapper.FileExt(element.Attr("href"))
191 | myID, _ := scrapper.FileExt(element.Request.URL.String())
192 |
193 | g.ParseFormat(myID, format)
194 | })
195 | }
196 |
--------------------------------------------------------------------------------
/internal/downloader/download.go:
--------------------------------------------------------------------------------
1 | package download
2 |
3 | import (
4 | "context"
5 | "errors"
6 | "fmt"
7 | "io"
8 | "log/slog"
9 | "net"
10 | "net/http"
11 | "os"
12 | "time"
13 |
14 | pb "github.com/cheggaaa/pb/v3"
15 | "github.com/julien-noblet/download-geofabrik/internal/config"
16 | )
17 |
18 | const (
19 | progressMinimal = 512 * 1024 // Don't display progress bar if size < 512kb
20 | defaultTimeout = 60 * time.Second
21 | keepAlive = 30 * time.Second
22 | idleTimeout = 5 * time.Second
23 | tlsTimeout = 10 * time.Second
24 | continueTimeout = 5 * time.Second
25 | fileMode = 0o644
26 | )
27 |
28 | var (
29 | ErrFromURL = errors.New("can't download element")
30 | ErrServerStatusCode = errors.New("server return code error")
31 | )
32 |
33 | // Downloader handles downloading files.
34 | type Downloader struct {
35 | Config *config.Config
36 | Options *config.Options
37 | }
38 |
39 | // NewDownloader creates a new Downloader.
40 | func NewDownloader(cfg *config.Config, opts *config.Options) *Downloader {
41 | return &Downloader{
42 | Config: cfg,
43 | Options: opts,
44 | }
45 | }
46 |
47 | // createClient creates a configured HTTP client.
48 | func createClient() *http.Client {
49 | return &http.Client{
50 | Transport: &http.Transport{
51 | Proxy: http.ProxyFromEnvironment,
52 | DialContext: (&net.Dialer{
53 | Timeout: defaultTimeout,
54 | KeepAlive: keepAlive,
55 | DualStack: true,
56 | }).DialContext,
57 | MaxIdleConns: 0,
58 | IdleConnTimeout: idleTimeout,
59 | TLSHandshakeTimeout: tlsTimeout,
60 | ExpectContinueTimeout: continueTimeout,
61 | },
62 | }
63 | }
64 |
65 | // FromURL downloads a file from a URL to a specified file path.
66 | func (d *Downloader) FromURL(ctx context.Context, myURL, fileName string) (err error) {
67 | slog.Debug("Downloading", "url", myURL, "file", fileName)
68 |
69 | if d.Options.NoDownload {
70 | return nil
71 | }
72 |
73 | client := createClient()
74 |
75 | req, err := http.NewRequestWithContext(ctx, http.MethodGet, myURL, http.NoBody)
76 | if err != nil {
77 | return fmt.Errorf("error creating request for %s - %w", myURL, err)
78 | }
79 |
80 | response, err := client.Do(req)
81 | if err != nil {
82 | return fmt.Errorf("error while downloading %s - %w", myURL, err)
83 | }
84 |
85 | defer func() {
86 | if cerr := response.Body.Close(); cerr != nil && err == nil {
87 | err = fmt.Errorf("error while closing response body for %s - %w", myURL, cerr)
88 | }
89 | }()
90 |
91 | if response.StatusCode != http.StatusOK {
92 | return fmt.Errorf("%w: error while downloading %v, server return code %d",
93 | ErrServerStatusCode, myURL, response.StatusCode)
94 | }
95 |
96 | return d.saveToFile(fileName, response)
97 | }
98 |
99 | // saveToFile saves the response body to a file with progress bar support.
100 | func (d *Downloader) saveToFile(fileName string, response *http.Response) (err error) {
101 | file, err := os.OpenFile(fileName, os.O_CREATE|os.O_WRONLY, fileMode)
102 | if err != nil {
103 | return fmt.Errorf("error while creating %s - %w", fileName, err)
104 | }
105 |
106 | defer func() {
107 | if cerr := file.Close(); cerr != nil && err == nil {
108 | err = fmt.Errorf("error while closing %s - %w", fileName, cerr)
109 | }
110 | }()
111 |
112 | var (
113 | output io.Writer = file
114 | currentProgress int64
115 | )
116 |
117 | // Display progress bar if requested, not quiet, and file is large enough
118 | if d.Options.Progress && !d.Options.Quiet && response.ContentLength > progressMinimal {
119 | progressBar := pb.Full.Start64(response.ContentLength)
120 | barReader := progressBar.NewProxyReader(response.Body)
121 |
122 | currentProgress, err = io.Copy(output, barReader)
123 | if err != nil {
124 | return fmt.Errorf("error while writing %s - %w", fileName, err)
125 | }
126 |
127 | progressBar.Finish()
128 | } else {
129 | currentProgress, err = io.Copy(output, response.Body)
130 | if err != nil {
131 | return fmt.Errorf("error while writing %s - %w", fileName, err)
132 | }
133 | }
134 |
135 | slog.Info("Downloaded", "file", fileName)
136 | slog.Debug("Bytes downloaded", "bytes", currentProgress)
137 |
138 | return nil
139 | }
140 |
141 | // FileExist checks if a file exists at the given path.
142 | func FileExist(filePath string) bool {
143 | _, err := os.Stat(filePath)
144 |
145 | return err == nil
146 | }
147 |
148 | // DownloadFile downloads a file based on the configuration and element.
149 | func (d *Downloader) DownloadFile(ctx context.Context, elementID, formatName, outputPath string) error {
150 | // elementID and formatName are strings.
151 | // config.FindElem uses d.Config.
152 | format := d.Config.Formats[formatName].ID
153 |
154 | myElem, err := config.FindElem(d.Config, elementID)
155 | if err != nil {
156 | slog.Error("Element not found", "element", elementID, "error", err)
157 |
158 | return fmt.Errorf("%w: %s", config.ErrFindElem, elementID)
159 | }
160 |
161 | myURL, err := config.Elem2URL(d.Config, myElem, format)
162 | if err != nil {
163 | slog.Error("URL generation failed", "error", err)
164 |
165 | return fmt.Errorf("%w: %w", config.ErrElem2URL, err)
166 | }
167 |
168 | err = d.FromURL(ctx, myURL, outputPath)
169 | if err != nil {
170 | slog.Error("Download failed", "error", err)
171 |
172 | return fmt.Errorf("%w: %w", ErrFromURL, err)
173 | }
174 |
175 | return nil
176 | }
177 |
178 | // Checksum downloads and verifies the checksum of a file.
179 | func (d *Downloader) Checksum(ctx context.Context, elementID, formatName string) bool {
180 | if !d.Options.Check {
181 | return false
182 | }
183 |
184 | hashType := "md5"
185 | fhash := formatName + "." + hashType
186 |
187 | if ok, _, _ := config.IsHashable(d.Config, formatName); ok {
188 | myElem, err := config.FindElem(d.Config, elementID)
189 | if err != nil {
190 | slog.Error("Element not found", "element", elementID, "error", err)
191 |
192 | return false
193 | }
194 |
195 | myURL, err := config.Elem2URL(d.Config, myElem, fhash)
196 | if err != nil {
197 | slog.Error("URL generation failed", "error", err)
198 |
199 | return false
200 | }
201 |
202 | outputPath := d.Options.OutputDirectory + elementID
203 |
204 | if e := d.FromURL(ctx, myURL, outputPath+"."+fhash); e != nil {
205 | slog.Error("Checksum download failed", "error", e)
206 |
207 | return false
208 | }
209 |
210 | return VerifyFileChecksum(outputPath+"."+d.Config.Formats[formatName].ID, outputPath+"."+fhash)
211 | }
212 |
213 | slog.Warn("No checksum provided", "file", d.Options.OutputDirectory+elementID+"."+formatName)
214 |
215 | return false
216 | }
217 |
--------------------------------------------------------------------------------
/internal/cli/download.go:
--------------------------------------------------------------------------------
1 | package cli
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "log/slog"
7 | "os"
8 |
9 | config "github.com/julien-noblet/download-geofabrik/internal/config"
10 | downloader "github.com/julien-noblet/download-geofabrik/internal/downloader"
11 | "github.com/julien-noblet/download-geofabrik/pkg/formats"
12 | "github.com/spf13/cobra"
13 | "github.com/spf13/viper"
14 | )
15 |
16 | var (
17 | // Flags for download command.
18 | outputDir string
19 | check bool
20 | noDownload bool
21 | downloadProgress bool
22 | // Format flags.
23 | formatFlags = make(map[string]*bool)
24 | )
25 |
26 | var downloadCmd = &cobra.Command{
27 | Use: "download [element]",
28 | Short: "Download element",
29 | Args: cobra.ExactArgs(1),
30 | RunE: runDownload,
31 | }
32 |
33 | func RegisterDownloadCmd() {
34 | rootCmd.AddCommand(downloadCmd)
35 |
36 | downloadCmd.Flags().StringVarP(&outputDir, "output-dir", "d", "", "Set output directory")
37 | downloadCmd.Flags().BoolVar(&check, "check", true, "Control with checksum (default). Use --no-check to discard control")
38 | downloadCmd.Flags().BoolVarP(&noDownload, "nodownload", "n", false, "Do not download file (test only)")
39 | downloadCmd.Flags().BoolVar(&downloadProgress, "progress", true, "Show progress bar")
40 |
41 | // Add format flags
42 | // These mimic the original kingpin flags
43 | addFormatFlag(formats.KeyOsmPbf, "P", "Download osm.pbf (default)")
44 | addFormatFlag(formats.KeyOshPbf, "H", "Download osh.pbf")
45 | addFormatFlag(formats.KeyOsmGz, "G", "Download osm.gz")
46 | addFormatFlag(formats.KeyOsmBz2, "B", "Download osm.bz2")
47 | addFormatFlag(formats.KeyShpZip, "S", "Download shp.zip")
48 | addFormatFlag(formats.KeyState, "", "Download state.txt")
49 | addFormatFlag(formats.KeyPoly, "p", "Download poly")
50 | addFormatFlag(formats.KeyKml, "k", "Download kml")
51 | addFormatFlag(formats.KeyGeoJSON, "g", "Download GeoJSON")
52 | addFormatFlag(formats.KeyGarminOSM, "O", "Download Garmin OSM")
53 |
54 | // Others...
55 | addFormatFlag(formats.KeyMapsforge, "m", "Download Mapsforge")
56 | addFormatFlag(formats.KeyMBTiles, "M", "Download MBTiles")
57 | addFormatFlag(formats.KeyCSV, "C", "Download CSV")
58 | addFormatFlag(formats.KeyGarminOnroad, "r", "Download Garmin Onroad")
59 | addFormatFlag(formats.KeyGarminOntrail, "t", "Download Garmin Ontrail")
60 | addFormatFlag(formats.KeyGarminOpenTopo, "o", "Download Garmin OpenTopo")
61 | }
62 |
63 | func addFormatFlag(key, shorthand, usage string) {
64 | val := false
65 | formatFlags[key] = &val
66 | downloadCmd.Flags().BoolVarP(&val, key, shorthand, false, usage)
67 | }
68 |
69 | func runDownload(_ *cobra.Command, args []string) error {
70 | elementID := args[0]
71 |
72 | // Prepare Options
73 | // Note: rootCmd flags (config file, verbose) should be parsed already.
74 | // I need to access them. They are bound to Viper in root.go (I need to ensure that).
75 |
76 | cfgFile := viper.ConfigFileUsed()
77 | if cfgFile == "" {
78 | if service != "" {
79 | cfgFile = service + ".yml"
80 | } else {
81 | cfgFile = config.DefaultConfigFile
82 | }
83 | }
84 |
85 | opts := &config.Options{
86 | ConfigFile: cfgFile,
87 | OutputDirectory: outputDir,
88 | Check: check,
89 | Verbose: viper.GetBool("verbose"),
90 | Quiet: viper.GetBool("quiet"),
91 | NoDownload: noDownload,
92 | Progress: downloadProgress,
93 | FormatFlags: make(map[string]bool),
94 | }
95 |
96 | // Fill format flags
97 | for k, v := range formatFlags {
98 | opts.FormatFlags[k] = *v
99 | }
100 |
101 | // Ensure output dir has separator?
102 | if opts.OutputDirectory == "" {
103 | wd, err := os.Getwd()
104 | if err != nil {
105 | return fmt.Errorf("failed to get working directory: %w", err)
106 | }
107 |
108 | opts.OutputDirectory = wd + string(os.PathSeparator)
109 | } else if opts.OutputDirectory[len(opts.OutputDirectory)-1] != os.PathSeparator {
110 | opts.OutputDirectory += string(os.PathSeparator)
111 | }
112 |
113 | // Load Config
114 | cfg, err := config.LoadConfig(opts.ConfigFile)
115 | if err != nil {
116 | slog.Error("Failed to load config", "file", opts.ConfigFile, "error", err)
117 |
118 | return fmt.Errorf("failed to load config: %w", err)
119 | }
120 |
121 | // Determine active formats
122 | activeFormats := formats.GetFormats(opts.FormatFlags)
123 |
124 | downloaderInstance := downloader.NewDownloader(cfg, opts)
125 |
126 | ctx := context.Background()
127 |
128 | for _, format := range activeFormats {
129 | // Filename calculation?
130 | // original used `GetFilename`.
131 | // `filename := GetFilename(viper.GetString(viperOutputDirectoryKey), viper.GetString(viperElementKey))`
132 |
133 | // I should reconstruct the file path.
134 | // `Downloader.DownloadFile` takes outputPath (e.g. dir/elementName.ext) or just dir/elementName?
135 | // My implementation of `DownloadFile` takes `outputPath`.
136 | // And checks `format` ID from config.
137 |
138 | // Construct the base filename (without extension).
139 | // Original: `r.FindStringSubmatch(outputDir + element)[0]` -> basically basename of element?
140 | // No, if element is path?
141 |
142 | // Construct the base filename (without extension).
143 | outFile := opts.OutputDirectory + elementID
144 | // Get format details for extension
145 | formatDef := cfg.Formats[format]
146 | targetFile := outFile + "." + formatDef.ID
147 |
148 | slog.Info("Processing", "element", elementID, "format", format)
149 |
150 | if err := processDownload(ctx, downloaderInstance, opts.Check, elementID, format, targetFile); err != nil {
151 | return err
152 | }
153 | }
154 |
155 | return nil
156 | }
157 |
158 | func processDownload(ctx context.Context, downloaderInstance *downloader.Downloader, check bool, elementID, format, targetFile string) error {
159 | if !check {
160 | if err := downloaderInstance.DownloadFile(ctx, elementID, format, targetFile); err != nil {
161 | return fmt.Errorf("download failed: %w", err)
162 | }
163 |
164 | return nil
165 | }
166 |
167 | shouldDownload := true
168 |
169 | if downloader.FileExist(targetFile) {
170 | if downloaderInstance.Checksum(ctx, elementID, format) {
171 | slog.Info("File already exists and checksum matches", "file", targetFile)
172 |
173 | shouldDownload = false
174 | } else {
175 | slog.Warn("Checksum mismatch or verification failed, re-downloading", "file", targetFile)
176 | }
177 | }
178 |
179 | if shouldDownload {
180 | if err := downloaderInstance.DownloadFile(ctx, elementID, format, targetFile); err != nil {
181 | return fmt.Errorf("download failed: %w", err)
182 | }
183 | // Verify again
184 | downloaderInstance.Checksum(ctx, elementID, format)
185 | }
186 |
187 | return nil
188 | }
189 |
--------------------------------------------------------------------------------
/internal/generator/importer/geofabrik/geofabrik.go:
--------------------------------------------------------------------------------
1 | package geofabrik
2 |
3 | import (
4 | "context"
5 | "encoding/json"
6 | "fmt"
7 | "io"
8 | "log/slog"
9 | "net"
10 | "net/http"
11 | "os"
12 | "sync"
13 | "time"
14 |
15 | "github.com/julien-noblet/download-geofabrik/internal/config"
16 | "github.com/julien-noblet/download-geofabrik/internal/element"
17 | "github.com/julien-noblet/download-geofabrik/pkg/formats"
18 | )
19 |
20 | var (
21 | GeofabrikIndexURL = `https://download.geofabrik.de/index-v1-nogeom.json`
22 | GeofabrikBaseURL = `https://download.geofabrik.de`
23 |
24 | // ErrDownload = "error while downloading %v, server returned code %d\nPlease use '%s generate' to re-create your yml file %w"
25 | // ErrCreatingRequest = "error while creating request for %s: %w"
26 | // ErrDownloading = "error while downloading %s: %w"
27 | // ErrReadingResponse = "error while reading response body: %w"
28 | // ErrUnmarshallingBody = "error while unmarshalling response body: %w"
29 | // ErrMergingElement = "error while merging element %v: %w".
30 |
31 | TimeoutDuration = 60 * time.Second
32 | KeepAliveDuration = 30 * time.Second
33 | IdleConnTimeout = 5 * time.Second
34 | TLSHandshakeTimeout = 10 * time.Second
35 | ExpectContinueTimeout = 5 * time.Second
36 | )
37 |
38 | // HTTPClient is a reusable HTTP client.
39 | var HTTPClient = &http.Client{
40 | Transport: &http.Transport{
41 | Proxy: http.ProxyFromEnvironment,
42 | DialContext: (&net.Dialer{
43 | Timeout: TimeoutDuration,
44 | KeepAlive: KeepAliveDuration,
45 | DualStack: true,
46 | }).DialContext,
47 | MaxIdleConns: 0,
48 | IdleConnTimeout: IdleConnTimeout,
49 | TLSHandshakeTimeout: TLSHandshakeTimeout,
50 | ExpectContinueTimeout: ExpectContinueTimeout,
51 | },
52 | }
53 |
54 | // FormatDefinition returns a map of format definitions.
55 | func FormatDefinition() formats.FormatDefinitions {
56 | return formats.FormatDefinitions{
57 | "osm.bz2.md5": {ID: "osm.bz2.md5", Loc: "-latest.osm.bz2.md5"},
58 | "osm.pbf.md5": {ID: "osm.pbf.md5", Loc: "-latest.osm.pbf.md5"},
59 | formats.FormatKml: {ID: formats.FormatKml, Loc: ".kml"},
60 | formats.FormatMBTiles: {ID: formats.FormatMBTiles, Loc: "-latest-free.mbtiles.zip", ToLoc: "latest-free.mbtiles.zip"},
61 | formats.FormatOsmBz2: {ID: formats.FormatOsmBz2, Loc: "-latest.osm.bz2"},
62 | formats.FormatOsmPbf: {ID: formats.FormatOsmPbf, Loc: "-latest.osm.pbf"},
63 | formats.FormatPoly: {ID: formats.FormatPoly, Loc: ".poly"},
64 | formats.FormatShpZip: {ID: formats.FormatShpZip, Loc: "-shortbread-1.0.mbtiles"},
65 | formats.FormatState: {ID: formats.FormatState, Loc: "-updates/state.txt"},
66 | }
67 | }
68 |
69 | // Index represents the structure of the Geofabrik index.
70 | type Index struct {
71 | Features []IndexElement `json:"features"`
72 | }
73 |
74 | // IndexElement represents an element in the Geofabrik index.
75 | type IndexElement struct {
76 | ElementProperties IndexElementProperties `json:"properties"`
77 | }
78 |
79 | // IndexElementProperties represents the properties of an index element.
80 | type IndexElementProperties struct {
81 | Urls map[string]string `json:"urls"`
82 | ID string `json:"id"`
83 | Name string `json:"name"`
84 | Parent string `json:"parent,omitempty"`
85 | Iso3166_1 []string `json:"iso3166-1:alpha2,omitempty"` //nolint:tagliatelle // That's geofabrik's field name
86 | Iso3166_2 []string `json:"iso3166-2,omitempty"` //nolint:tagliatelle // That's geofabrik's field name
87 | }
88 |
89 | // GetIndex downloads the Geofabrik index and unmarshals the JSON response.
90 | func GetIndex(url string) (*Index, error) {
91 | ctx, cancel := context.WithTimeout(context.Background(), TimeoutDuration)
92 | defer cancel()
93 |
94 | req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, http.NoBody)
95 | if err != nil {
96 | return nil, fmt.Errorf("error while creating request for %s: %w", url, err)
97 | }
98 |
99 | response, err := HTTPClient.Do(req)
100 | if err != nil {
101 | return nil, fmt.Errorf("error while downloading %s: %w", url, err)
102 | }
103 |
104 | defer func() {
105 | if cerr := response.Body.Close(); cerr != nil && err == nil {
106 | err = fmt.Errorf("close response: %w", cerr)
107 | }
108 | }()
109 |
110 | if response.StatusCode != http.StatusOK {
111 | return nil, handleHTTPError(response, url)
112 | }
113 |
114 | bodyBytes, err := io.ReadAll(response.Body)
115 | if err != nil {
116 | return nil, fmt.Errorf("error while reading response body: %w", err)
117 | }
118 |
119 | var geofabrikIndex Index
120 | if err := json.Unmarshal(bodyBytes, &geofabrikIndex); err != nil {
121 | return nil, fmt.Errorf("error while unmarshalling response body: %w", err)
122 | }
123 |
124 | return &geofabrikIndex, nil
125 | }
126 |
127 | // handleHTTPError handles HTTP errors based on the status code.
128 | func handleHTTPError(response *http.Response, url string) error {
129 | switch response.StatusCode {
130 | case http.StatusNotFound:
131 | return fmt.Errorf("error while downloading %s, server returned code %d\n"+
132 | "Please use '%s generate' to re-create your yml file %w", url, response.StatusCode, os.Args[0], http.ErrNoLocation)
133 |
134 | default:
135 | return fmt.Errorf("error while downloading %s, server returned code %d\n"+
136 | "Please use '%s generate' to re-create your yml file %w", url, response.StatusCode, os.Args[0], http.ErrNotSupported)
137 | }
138 | }
139 |
140 | // Convert converts the Geofabrik index to a config.Config object.
141 | func Convert(index *Index) (*config.Config, error) {
142 | cfg := &config.Config{
143 | Formats: FormatDefinition(),
144 | BaseURL: GeofabrikBaseURL,
145 | Elements: element.MapElement{},
146 | ElementsMutex: &sync.RWMutex{},
147 | }
148 |
149 | for _, feature := range index.Features {
150 | if err := processFeature(cfg, &feature); err != nil {
151 | return nil, err
152 | }
153 | }
154 |
155 | return cfg, nil
156 | }
157 |
158 | // processFeature processes a single feature from the Geofabrik index.
159 | func processFeature(cfg *config.Config, feature *IndexElement) error {
160 | var elem element.Element
161 |
162 | slog.Debug("Processing feature", "ID", feature.ElementProperties.ID)
163 |
164 | elem.ID = feature.ElementProperties.ID
165 | elem.Parent = feature.ElementProperties.Parent
166 | elem.Name = feature.ElementProperties.Name
167 |
168 | elem.Formats = append(elem.Formats, getFormats(feature.ElementProperties.Urls)...)
169 |
170 | if err := cfg.MergeElement(&elem); err != nil {
171 | return fmt.Errorf("error while merging element %v: %w", elem, err)
172 | }
173 |
174 | return nil
175 | }
176 |
177 | // getFormats returns the formats based on the URLs.
178 | func getFormats(urls map[string]string) []string {
179 | myFormats := []string{}
180 |
181 | for k := range urls {
182 | switch k {
183 | case "pbf":
184 | myFormats = append(myFormats, formats.FormatOsmPbf, "osm.pbf.md5")
185 | case "bz2":
186 | myFormats = append(myFormats, formats.FormatOsmBz2, "osm.bz2.md5")
187 | case "shp":
188 | myFormats = append(myFormats, formats.FormatShpZip)
189 | case "history":
190 | myFormats = append(myFormats, formats.FormatOshPbf)
191 | }
192 | }
193 |
194 | myFormats = append(myFormats, formats.FormatPoly, formats.FormatKml, formats.FormatState)
195 |
196 | return myFormats
197 | }
198 |
--------------------------------------------------------------------------------
/internal/cli/download_test.go:
--------------------------------------------------------------------------------
1 | package cli_test
2 |
3 | import (
4 | "os"
5 | "testing"
6 |
7 | "github.com/julien-noblet/download-geofabrik/internal/cli"
8 | "github.com/spf13/viper"
9 | "github.com/stretchr/testify/assert"
10 | "github.com/stretchr/testify/require"
11 | )
12 |
13 | func TestDownloadCmd_NoDownload(t *testing.T) {
14 | // Setup
15 | // Reset viper to avoid pollution
16 | viper.Reset()
17 |
18 | // We need to call initCLI to set up flags on rootCmd?
19 | // Or just RegisterDownloadCmd?
20 | // downloadCmd is a global variable in the package, so we need to be careful.
21 | // Ideally we should reset flags.
22 |
23 | // Let's create a fresh environment if possible, but the code relies on globals.
24 | // We will trust valid usage for now.
25 |
26 | // Register command if not already (Execute does this, but we might verify individual command)
27 | // RegisterDownloadCmd() has side effects (adding to rootCmd).
28 | // If we run multiple tests, we might get duplicates if we keep calling it.
29 | // But `cobra` handles that gracefully? No, it might panic or duplicate.
30 | // Ideally, we test `downloadCmd` directly via its `RunE` or via `rootCmd` if properly set.
31 |
32 | // RegisterDownloadCmd() has side effects (adding to rootCmd).
33 | // We rely on Execute() to do this.
34 |
35 | // Let's try to run `downloadCmd` directly with `SetArgs`.
36 | // We need to initialize flags first.
37 |
38 | // We need to initialize viper bindings similar to main
39 | // initCLI also binds persistent flags.
40 | // Let's manually set up what we need.
41 |
42 | // Create a temporary directory for output
43 | tmpDir := t.TempDir()
44 |
45 | // Set args
46 | // We use "param" as element.
47 | // We use --nodownload to avoid network.
48 | // We need a config file, or defaults.
49 | // If no config file is found, it looks for "geofabrik.yml".
50 | // We should probably provide a dummy config or rely on defaults failing gracefully if we mocked network?
51 | // But --nodownload expects to read config to find the element URL even if it doesn't download?
52 | // Let's check `runDownload`.
53 |
54 | // `downloaderInstance := downloader.NewDownloader(cfg, opts)`
55 | // `processDownload` -> `downloaderInstance.DownloadFile`
56 | // `DownloadFile` -> `config.FindElem` -> `config.Elem2URL` -> `FromURL`
57 | // `FromURL` checks `d.Options.NoDownload`.
58 |
59 | // So we need a valid config that contains the element we are asking for,
60 | // OR we can mock the config loading?
61 | // Config loading happens in `runDownload`: `cfg, err := config.LoadConfig(opts.ConfigFile)`
62 |
63 | // If `config.LoadConfig` fails, `runDownload` returns error.
64 | // We need a valid config file.
65 |
66 | // Let's create a minimal config file.
67 | configFile := tmpDir + "/geofabrik.yml"
68 |
69 | err := os.WriteFile(configFile, []byte(testConfigContent), 0o600)
70 | require.NoError(t, err)
71 |
72 | // Set flags
73 | cli.RootCmd.SetArgs([]string{"download", "test-elem", "--config", configFile, "--nodownload", "--output-dir", tmpDir})
74 |
75 | // To properly test, we should run `rootCmd.Execute()` because `downloadCmd` is a subcommand.
76 | // `Execute` in `root.go` calls `initCLI` then `rootCmd.Execute()`.
77 | // But `Execute` also calls `Register...` which updates `rootCmd`.
78 | // If we call `Execute()`, it might double register if we are not careful?
79 | // `RegisterDownloadCmd` adds `downloadCmd` to `rootCmd`.
80 | // Cobra's `AddCommand` is idempotent-ish (adds to commands slice).
81 |
82 | // Let's rely on `rootCmd` being package global.
83 | // We can call `Execute()` but we need to prevent `initCLI` and `Register...` from messing up if called repeatedly?
84 | // The current implementation of `Execute` is:
85 | /*
86 | func Execute() error {
87 | initCLI()
88 | RegisterDownloadCmd()
89 | ...
90 | return rootCmd.Execute()
91 | }
92 | */
93 | // If we test `Execute`, we are stuck with that logic.
94 | // Better to test `rootCmd.Execute()` directly after manual setup?
95 |
96 | // Setup for test
97 | // We need to make sure flags are registered.
98 | // Resetting globals is hard.
99 |
100 | // Workaround: Use a separate test harness that doesn't rely on `Execute` but sets up `rootCmd`.
101 | // But `initCLI` is private.
102 | // `RegisterDownloadCmd` is public.
103 |
104 | // Let's try calling `Execute` once per test?
105 | // Or just call `Execute()` and accept it.
106 |
107 | // Wait, `Execute` takes no args. It uses `os.Args`.
108 | // We can set `os.Args`?
109 | oldArgs := os.Args
110 |
111 | defer func() { os.Args = oldArgs }()
112 |
113 | os.Args = []string{"download-geofabrik", "download", "test-elem", "--config", configFile, "--nodownload", "--output-dir", tmpDir}
114 |
115 | // Run Execute
116 | err = cli.Execute()
117 |
118 | // Assert
119 | assert.NoError(t, err)
120 | }
121 |
122 | func TestDownloadCmd_InvalidArgs(t *testing.T) {
123 | oldArgs := os.Args
124 |
125 | defer func() { os.Args = oldArgs }()
126 |
127 | // Missing element arg
128 | os.Args = []string{"download-geofabrik", "download"}
129 |
130 | // Execute
131 | // Note: cobra might print to stderr.
132 | err := cli.Execute()
133 |
134 | require.Error(t, err)
135 |
136 | // Test invalid arguments (missing element)
137 | // Need to set up a dummy config file for this test case as well
138 | tmpDir := t.TempDir()
139 |
140 | configFile := tmpDir + "/geofabrik.yml"
141 |
142 | err = os.WriteFile(configFile, []byte(testConfigContent), 0o600)
143 | require.NoError(t, err)
144 |
145 | cli.RootCmd.SetArgs([]string{"download", "--config", configFile})
146 |
147 | err = cli.Execute()
148 | require.Error(t, err)
149 | }
150 |
151 | func TestDownloadCmd_NoCheck(t *testing.T) {
152 | tmpDir := t.TempDir()
153 |
154 | configFile := tmpDir + "/geofabrik.yml"
155 |
156 | err := os.WriteFile(configFile, []byte(testConfigContent), 0o600)
157 | require.NoError(t, err)
158 |
159 | cli.RootCmd.SetArgs([]string{"download", "test-elem", "--config", configFile, "--nodownload", "--output-dir", tmpDir, "--check=false"})
160 |
161 | err = cli.Execute()
162 | require.NoError(t, err)
163 | }
164 |
165 | func TestDownloadCmd_FileExists(t *testing.T) {
166 | // Test file exists path
167 | tmpDir := t.TempDir()
168 |
169 | configFile := tmpDir + "/geofabrik.yml"
170 |
171 | err := os.WriteFile(configFile, []byte(testConfigContent), 0o600)
172 | require.NoError(t, err)
173 |
174 | // Create dummy file
175 | targetFile := tmpDir + "/test-elem.osm.pbf"
176 | err = os.WriteFile(targetFile, []byte("dummy content"), 0o600)
177 | require.NoError(t, err)
178 |
179 | cli.RootCmd.SetArgs([]string{"download", "test-elem", "--config", configFile, "--nodownload", "--output-dir", tmpDir})
180 |
181 | err = cli.Execute()
182 | require.NoError(t, err)
183 | }
184 |
185 | func TestDownloadCmd_DefaultOutputDir(t *testing.T) {
186 | // Test default output dir (current directory)
187 | tmpDir := t.TempDir()
188 |
189 | configFile := tmpDir + "/geofabrik.yml"
190 |
191 | err := os.WriteFile(configFile, []byte(testConfigContent), 0o600)
192 | require.NoError(t, err)
193 |
194 | t.Chdir(tmpDir)
195 |
196 | // Reset globs
197 | // We can't easily reset private globs from cli_test without more exports or helpers.
198 | // But since we are running in a separate process for "go test" usually? No, same process.
199 | // `cli` package state persists.
200 | // Exporting `ResetGlobs`?
201 |
202 | // Let's create `ResetGlobs` in export_test.go as well.
203 | cli.ResetGlobs()
204 |
205 | viper.Reset()
206 |
207 | // Explicitly UNSET outputDir flag variable?
208 | // It's a string variable in download.go
209 | // We might need to reset that too via ResetGlobs.
210 |
211 | cli.RootCmd.SetArgs([]string{"download", "test-elem", "--config", configFile, "--nodownload"})
212 |
213 | err = cli.Execute()
214 | require.NoError(t, err)
215 | }
216 |
--------------------------------------------------------------------------------
/internal/config/config.go:
--------------------------------------------------------------------------------
1 | package config
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "log/slog"
7 | "os"
8 | "path/filepath"
9 | "reflect"
10 | "strings"
11 | "sync"
12 |
13 | "github.com/julien-noblet/download-geofabrik/internal/element"
14 | "github.com/julien-noblet/download-geofabrik/pkg/formats"
15 | "gopkg.in/yaml.v3"
16 | )
17 |
18 | const (
19 | DefaultConfigFile = "geofabrik.yml"
20 | DefaultService = "geofabrik"
21 | )
22 |
23 | var (
24 | ErrElem2URL = errors.New("can't find url")
25 | ErrLoadConfig = errors.New("can't load config")
26 | ErrFindElem = errors.New("element not found")
27 | ErrParentMismatch = errors.New("can't merge")
28 | ErrFormatNotExist = errors.New("format not exist")
29 |
30 | hashes = []string{"md5"}
31 | )
32 |
33 | // Config structure handles all elements and formats from the YAML database.
34 | type Config struct {
35 | Formats formats.FormatDefinitions `yaml:"formats"`
36 | Elements element.MapElement `yaml:"elements"`
37 | ElementsMutex *sync.RWMutex `yaml:"-"` // unexported
38 | BaseURL string `yaml:"baseURL"` //nolint:tagliatelle // external yaml requirement
39 | }
40 |
41 | // Options holds runtime configuration (flags).
42 | // Field alignment optimized.
43 | type Options struct {
44 | FormatFlags map[string]bool
45 | ConfigFile string
46 | Service string
47 | OutputDirectory string
48 | Check bool
49 | Verbose bool
50 | Quiet bool
51 | NoDownload bool
52 | Progress bool
53 | }
54 |
55 | // Generate Yaml config.
56 | func (config *Config) Generate() ([]byte, error) {
57 | yml, err := yaml.Marshal(config)
58 | if err != nil {
59 | return nil, fmt.Errorf("failed to Marshal: %w", err)
60 | }
61 |
62 | return yml, nil
63 | }
64 |
65 | // MergeElement merges a new element into the config.
66 | func (config *Config) MergeElement(elementPtr *element.Element) error {
67 | config.ElementsMutex.RLock()
68 | newElement, ok := config.Elements[elementPtr.ID]
69 | config.ElementsMutex.RUnlock()
70 |
71 | if ok {
72 | if newElement.Parent != elementPtr.Parent {
73 | return fmt.Errorf("%w: Parent mismatch %s != %s (%s)", ErrParentMismatch, newElement.Parent, elementPtr.Parent, elementPtr.ID)
74 | }
75 |
76 | config.ElementsMutex.Lock()
77 | defer config.ElementsMutex.Unlock()
78 |
79 | for _, f := range elementPtr.Formats {
80 | if !newElement.Formats.Contains(f) {
81 | newElement.Formats = append(newElement.Formats, f)
82 | }
83 | }
84 |
85 | newElement.Meta = len(newElement.Formats) == 0
86 | config.Elements[elementPtr.ID] = newElement
87 | } else {
88 | config.ElementsMutex.Lock()
89 | defer config.ElementsMutex.Unlock()
90 |
91 | config.Elements[elementPtr.ID] = *elementPtr
92 | }
93 |
94 | return nil
95 | }
96 |
97 | // Exist checks if an element with the given ID exists in the config.
98 | func (config *Config) Exist(elementID string) bool {
99 | config.ElementsMutex.RLock()
100 | defer config.ElementsMutex.RUnlock()
101 |
102 | result := reflect.DeepEqual(config.Elements[elementID], element.Element{})
103 |
104 | return !result
105 | }
106 |
107 | // AddExtension adds an extension to an element.
108 | func (config *Config) AddExtension(elementID, format string) {
109 | config.ElementsMutex.RLock()
110 | elem := config.Elements[elementID]
111 | config.ElementsMutex.RUnlock()
112 |
113 | if !elem.Formats.Contains(format) {
114 | slog.Info("Add extension to element", "format", format, "id", elem.ID)
115 |
116 | config.ElementsMutex.Lock()
117 |
118 | elem.Formats = append(elem.Formats, format)
119 |
120 | config.ElementsMutex.Unlock()
121 |
122 | if err := config.MergeElement(&elem); err != nil {
123 | slog.Error("can't merge element", "error", err, "name", elem.Name)
124 | os.Exit(1) // Or handle better
125 | }
126 | }
127 | }
128 |
129 | // GetElement gets an element by ID or returns an error if not found.
130 | func (config *Config) GetElement(elementID string) (*element.Element, error) {
131 | if config.Exist(elementID) {
132 | config.ElementsMutex.RLock()
133 | r := config.Elements[elementID]
134 | config.ElementsMutex.RUnlock()
135 |
136 | return &r, nil
137 | }
138 |
139 | return nil, fmt.Errorf("%w: %s", ErrFindElem, elementID)
140 | }
141 |
142 | // FindElem finds an element in the config by ID.
143 | func FindElem(config *Config, e string) (*element.Element, error) {
144 | res := config.Elements[e]
145 | if res.ID == "" || res.ID != e {
146 | return nil, fmt.Errorf("%w: %s is not in config. Please use \"list\" command", ErrFindElem, e)
147 | }
148 |
149 | return &res, nil
150 | }
151 |
152 | // GetFile gets the file name of an element.
153 | func GetFile(myElement *element.Element) string {
154 | if myElement.File != "" {
155 | return myElement.File
156 | }
157 |
158 | return myElement.ID
159 | }
160 |
161 | // Elem2preURL generates a pre-URL for an element.
162 | func Elem2preURL(config *Config, elementPtr *element.Element, baseURL ...string) (string, error) {
163 | myElement, err := FindElem(config, elementPtr.ID)
164 | if err != nil {
165 | return "", err
166 | }
167 |
168 | if myElement.HasParent() {
169 | parent, err := FindElem(config, myElement.Parent)
170 | if err != nil {
171 | return "", err
172 | }
173 |
174 | res, err := Elem2preURL(config, parent, baseURL...)
175 | if err != nil {
176 | return "", err
177 | }
178 |
179 | res += "/" + GetFile(myElement)
180 |
181 | return res, nil
182 | }
183 |
184 | switch len(baseURL) {
185 | case 1:
186 | prefix := config.BaseURL + "/" + strings.Join(baseURL, "/")
187 | if !strings.HasSuffix(prefix, "/") {
188 | prefix += "/"
189 | }
190 |
191 | return prefix + GetFile(myElement), nil
192 |
193 | case 2: //nolint:mnd // This case handles exactly 2 base URL components
194 | prefix := strings.Join(baseURL, "/")
195 | if !strings.HasSuffix(prefix, "/") {
196 | prefix += "/"
197 | }
198 |
199 | return prefix + GetFile(myElement), nil
200 |
201 | default:
202 | return config.BaseURL + "/" + GetFile(myElement), nil
203 | }
204 | }
205 |
206 | // Elem2URL generates a URL for an element with the given extension.
207 | func Elem2URL(config *Config, elementPtr *element.Element, ext string) (string, error) {
208 | if !elementPtr.Formats.Contains(ext) {
209 | return "", fmt.Errorf("%w: %s", ErrFormatNotExist, ext)
210 | }
211 |
212 | format := config.Formats[ext]
213 | baseURL, basePath := format.BaseURL, format.BasePath
214 |
215 | if baseURL == "" {
216 | baseURL = config.BaseURL
217 | }
218 |
219 | res, err := Elem2preURL(config, elementPtr, baseURL, basePath)
220 | if err != nil {
221 | return "", err
222 | }
223 |
224 | return res + format.Loc, nil
225 | }
226 |
227 | // LoadConfig loads the configuration from the specified file.
228 | func LoadConfig(configFile string) (*Config, error) {
229 | filename, _ := filepath.Abs(configFile)
230 |
231 | fileContent, err := os.ReadFile(filename)
232 | if err != nil {
233 | return nil, fmt.Errorf("can't open %s: %w", filename, err)
234 | }
235 |
236 | myConfigPtr := &Config{
237 | Formats: formats.FormatDefinitions{},
238 | Elements: element.MapElement{},
239 | ElementsMutex: &sync.RWMutex{},
240 | BaseURL: "",
241 | }
242 |
243 | if err := yaml.Unmarshal(fileContent, myConfigPtr); err != nil {
244 | return nil, fmt.Errorf("can't unmarshal %s: %w", filename, err)
245 | }
246 |
247 | return myConfigPtr, nil
248 | }
249 |
250 | // IsHashable checks if a format is hashable.
251 | func IsHashable(config *Config, format string) (isHashable bool, hash, extension string) {
252 | if _, ok := config.Formats[format]; ok {
253 | for _, h := range hashes {
254 | hash := format + "." + h
255 | if _, ok := config.Formats[hash]; ok {
256 | return true, hash, h
257 | }
258 | }
259 | }
260 |
261 | return false, "", ""
262 | }
263 |
--------------------------------------------------------------------------------
/internal/scrapper/geo2day/geo2day_test.go:
--------------------------------------------------------------------------------
1 | package geo2day_test
2 |
3 | import (
4 | "fmt"
5 | "net/http"
6 | "net/http/httptest"
7 | "reflect"
8 | "testing"
9 |
10 | "github.com/julien-noblet/download-geofabrik/internal/element"
11 | "github.com/julien-noblet/download-geofabrik/internal/scrapper/geo2day"
12 | "github.com/julien-noblet/download-geofabrik/pkg/formats"
13 | "github.com/stretchr/testify/assert"
14 | "github.com/stretchr/testify/require"
15 | )
16 |
17 | func TestGeo2day_Exceptions(t *testing.T) {
18 | t.Parallel()
19 |
20 | type args struct {
21 | e element.Element
22 | }
23 |
24 | tests := []struct {
25 | name string
26 | args args
27 | want element.Element
28 | }{
29 | // TODO: Add test cases.
30 | {
31 | name: "la_rioja in argentina",
32 | args: args{
33 | e: element.Element{
34 | ID: "la_rioja",
35 | Parent: "argentina",
36 | File: "la_rioja",
37 | },
38 | },
39 | want: element.Element{
40 | ID: "la_rioja-argentina",
41 | Parent: "argentina",
42 | File: "la_rioja",
43 | },
44 | },
45 | {
46 | name: "la_rioja in spain",
47 | args: args{
48 | e: element.Element{
49 | ID: "la_rioja",
50 | Parent: "spain",
51 | File: "la_rioja",
52 | },
53 | },
54 | want: element.Element{
55 | ID: "la_rioja-spain",
56 | Parent: "spain",
57 | File: "la_rioja",
58 | },
59 | },
60 | {
61 | name: "guyane in france",
62 | args: args{
63 | e: element.Element{
64 | ID: "guyane",
65 | Parent: "france",
66 | File: "guyane",
67 | },
68 | },
69 | want: element.Element{
70 | ID: "guyane-france",
71 | Parent: "france",
72 | File: "guyane",
73 | },
74 | },
75 | {
76 | name: "guyane in south-america",
77 | args: args{
78 | e: element.Element{
79 | ID: "guyane",
80 | Parent: "south-america",
81 | File: "guyane",
82 | },
83 | },
84 | want: element.Element{
85 | ID: "guyane-south-america",
86 | Parent: "south-america",
87 | File: "guyane",
88 | },
89 | },
90 | {
91 | name: "sevastopol in ukraine",
92 | args: args{
93 | e: element.Element{
94 | ID: "sevastopol",
95 | Parent: "ukraine",
96 | File: "sevastopol",
97 | },
98 | },
99 | want: element.Element{
100 | ID: "sevastopol-ukraine",
101 | Parent: "ukraine",
102 | File: "sevastopol",
103 | },
104 | },
105 | {
106 | name: "sevastopol in russia",
107 | args: args{
108 | e: element.Element{
109 | ID: "sevastopol",
110 | Parent: "russia",
111 | File: "sevastopol",
112 | },
113 | },
114 | want: element.Element{
115 | ID: "sevastopol-russia",
116 | Parent: "russia",
117 | File: "sevastopol",
118 | },
119 | },
120 | {
121 | name: "limburg in netherlands",
122 | args: args{
123 | e: element.Element{
124 | ID: "limburg",
125 | Parent: "netherlands",
126 | File: "limburg",
127 | },
128 | },
129 | want: element.Element{
130 | ID: "limburg-netherlands",
131 | Parent: "netherlands",
132 | File: "limburg",
133 | },
134 | },
135 | {
136 | name: "limburg in flanders",
137 | args: args{
138 | e: element.Element{
139 | ID: "limburg",
140 | Parent: "flanders",
141 | File: "limburg",
142 | },
143 | },
144 | want: element.Element{
145 | ID: "limburg-flanders",
146 | Parent: "flanders",
147 | File: "limburg",
148 | },
149 | },
150 | {
151 | name: "cordoba in argentina",
152 | args: args{
153 | e: element.Element{
154 | ID: "cordoba",
155 | Parent: "argentina",
156 | File: "cordoba",
157 | },
158 | },
159 | want: element.Element{
160 | ID: "cordoba-argentina",
161 | Parent: "argentina",
162 | File: "cordoba",
163 | },
164 | },
165 | {
166 | name: "cordoba in andalucia",
167 | args: args{
168 | e: element.Element{
169 | ID: "cordoba",
170 | Parent: "andalucia",
171 | File: "cordoba",
172 | },
173 | },
174 | want: element.Element{
175 | ID: "cordoba-andalucia",
176 | Parent: "andalucia",
177 | File: "cordoba",
178 | },
179 | },
180 | {
181 | name: "georgia in usa",
182 | args: args{
183 | e: element.Element{
184 | ID: "georgia",
185 | Parent: "us",
186 | File: "georgia",
187 | },
188 | },
189 | want: element.Element{
190 | ID: "georgia-us",
191 | Parent: "us",
192 | File: "georgia",
193 | },
194 | },
195 | {
196 | name: "georgia in asia",
197 | args: args{
198 | e: element.Element{
199 | ID: "georgia",
200 | Parent: "asia",
201 | File: "georgia",
202 | },
203 | },
204 | want: element.Element{
205 | ID: "georgia-asia",
206 | Parent: "asia",
207 | File: "georgia",
208 | },
209 | },
210 | {
211 | name: "france is not in the list",
212 | args: args{
213 | e: element.Element{
214 | ID: "france",
215 | Parent: "europe",
216 | File: "france",
217 | },
218 | },
219 | want: element.Element{
220 | ID: "france",
221 | Parent: "europe",
222 | File: "france",
223 | },
224 | },
225 | }
226 |
227 | for _, tt := range tests {
228 | t.Run(tt.name, func(t *testing.T) {
229 | t.Parallel()
230 |
231 | g := geo2day.GetDefault()
232 |
233 | if got := g.Exceptions(&tt.args.e); !reflect.DeepEqual(got, &tt.want) {
234 | t.Errorf("Geo2day.Exceptions() = %v, want %v", *got, tt.want)
235 | }
236 | })
237 | }
238 | }
239 |
240 | func TestGeo2day_Collector(t *testing.T) {
241 | // Mock server
242 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
243 | switch r.URL.Path {
244 | case "/":
245 | fmt.Fprintln(w, `
246 |
247 |
248 |
254 |
257 |
258 |
259 | `)
260 |
261 | case "/europe.html":
262 | fmt.Fprintln(w, `
263 |
264 |
265 |
271 |
272 |
273 | `)
274 |
275 | case "/europe/france.html":
276 | fmt.Fprintln(w, `
277 |
278 |
279 |
284 |
285 |
286 | `)
287 |
288 | default:
289 | http.NotFound(w, r)
290 | }
291 | }))
292 | defer ts.Close()
293 |
294 | g := geo2day.GetDefault()
295 | g.BaseURL = ts.URL
296 | g.StartURL = ts.URL + "/"
297 | g.AllowedDomains = nil // Allow all domains (specifically the test server)
298 | g.URLFilters = nil // Disable URL filters to allow test server URLs
299 | g.Parallelism = 1 // Process sequentially to avoid race in test logic if any (though map access needs sync)
300 |
301 | c := g.Collector()
302 |
303 | // Visit the start URL
304 | err := c.Visit(g.StartURL)
305 | require.NoError(t, err)
306 | c.Wait()
307 |
308 | // Helper to get element by key safely
309 | getElement := func(key string) (element.Element, bool) {
310 | g.Config.ElementsMutex.RLock()
311 | defer g.Config.ElementsMutex.RUnlock() // Defer unlock
312 |
313 | el, ok := g.Config.Elements[key]
314 |
315 | return el, ok
316 | }
317 |
318 | // 1. Check Planet PBF (Attached to root element "")
319 | rootEl, found := getElement("")
320 | assert.True(t, found, "root element should exist")
321 |
322 | if found {
323 | assert.Contains(t, rootEl.Formats, formats.FormatOsmPbf)
324 | }
325 |
326 | // 2. Check Europe (HTML)
327 | europe, found := getElement("europe")
328 | assert.True(t, found, "europe element should exist")
329 | assert.Equal(t, "europe", europe.ID)
330 | // 3. Check traversal to France
331 | france, found := getElement("france")
332 | assert.True(t, found, "france element should exist")
333 |
334 | if found {
335 | assert.Equal(t, "france", france.ID)
336 | assert.Equal(t, "europe", france.Parent)
337 | }
338 |
339 | // 4. Check contents within France (Ile-de-france)
340 | // ID "ile-de-france"
341 | idf, found := getElement("ile-de-france")
342 | assert.True(t, found, "ile-de-france element should exist")
343 |
344 | if found {
345 | // Parent should be "france"
346 | assert.Equal(t, "france", idf.Parent)
347 | }
348 | }
349 |
--------------------------------------------------------------------------------
/internal/scrapper/openstreetmapfr/openstreetmapfr.go:
--------------------------------------------------------------------------------
1 | package openstreetmapfr
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "log/slog"
7 | "regexp"
8 | "strings"
9 | "time"
10 |
11 | "github.com/gocolly/colly/v2"
12 | "github.com/julien-noblet/download-geofabrik/internal/element"
13 | "github.com/julien-noblet/download-geofabrik/internal/scrapper"
14 | "github.com/julien-noblet/download-geofabrik/pkg/formats"
15 | )
16 |
17 | // OpenstreetmapFR Scrapper.
18 | type OpenstreetmapFR struct {
19 | *scrapper.Scrapper
20 | }
21 |
22 | const (
23 | defaultTimeout = time.Second * 30
24 | passList = "HEADER"
25 | nbElmt = 1196 // Number of elements in openstreetmap.fr
26 | parallelism = 20 // use 20 routines to scrape openstreetmap.fr
27 | randomDelay = time.Second * 5 // Random delay between 0 and 5 seconds
28 | minParentListLength = 4
29 | )
30 |
31 | var exceptionList = map[string]struct{}{
32 | "central": {},
33 | "central-east": {},
34 | "central-north": {},
35 | "central-south": {},
36 | "central-west": {},
37 | "central_east": {},
38 | "central_north": {},
39 | "central_south": {},
40 | "central_west": {},
41 | "coastral": {},
42 | "east": {},
43 | "east_central": {},
44 | "east-central": {},
45 | "eastern": {},
46 | "lake": {},
47 | "north": {},
48 | "north_central": {},
49 | "north-central": {},
50 | "north-east": {},
51 | "north-eastern": {},
52 | "north-west": {},
53 | "north-western": {},
54 | "north_east": {},
55 | "north_eastern": {},
56 | "north_west": {},
57 | "north_western": {},
58 | "northeast": {},
59 | "northern": {},
60 | "northwest": {},
61 | "south": {},
62 | "south_central": {},
63 | "south-central": {},
64 | "south-east": {},
65 | "south-south": {},
66 | "south-west": {},
67 | "south_east": {},
68 | "south_south": {},
69 | "south_west": {},
70 | "southeast": {},
71 | "southern": {},
72 | "southwest": {},
73 | "west": {},
74 | "west_central": {},
75 | "west-central": {},
76 | "western": {},
77 | "france_taaf": {},
78 | "sevastopol": {},
79 | "la_rioja": {},
80 | "jura": {},
81 | "santa_cruz": {},
82 | }
83 |
84 | // GetDefault returns a default instance of OpenstreetmapFR.
85 | func GetDefault() *OpenstreetmapFR {
86 | timeout := defaultTimeout
87 |
88 | return &OpenstreetmapFR{
89 | Scrapper: &scrapper.Scrapper{
90 | PB: nbElmt,
91 | Async: true,
92 | Parallelism: parallelism,
93 | MaxDepth: 0,
94 | AllowedDomains: []string{`download.openstreetmap.fr`},
95 | BaseURL: `https://download.openstreetmap.fr/extracts`,
96 | StartURL: `https://download.openstreetmap.fr/`,
97 | URLFilters: []*regexp.Regexp{
98 | regexp.MustCompile(`https://download\.openstreetmap\.fr/$`),
99 | regexp.MustCompile(`https://download\.openstreetmap\.fr/extracts/(\w.+|)$`), //nolint:gocritic // This is a valid regexp
100 | regexp.MustCompile(`https://download\.openstreetmap\.fr/polygons/(\w.+|)$`), //nolint:gocritic // This is a valid regexp
101 | regexp.MustCompile(`https://download.openstreetmap.fr/cgi-bin/^(.*)$`), //nolint:gocritic // This is a valid regexp
102 | regexp.MustCompile(`https://download.openstreetmap.fr/replication/^(.*|)$`), //nolint:gocritic // This is a valid regexp
103 | },
104 | FormatDefinition: formats.FormatDefinitions{
105 | "osm.pbf.md5": {ID: "osm.pbf.md5", Loc: "-latest.osm.pbf.md5", ToLoc: "", BasePath: "", BaseURL: ""},
106 | formats.FormatOsmPbf: {ID: formats.FormatOsmPbf, Loc: "-latest.osm.pbf", ToLoc: "", BasePath: "", BaseURL: ""},
107 | formats.FormatPoly: {ID: formats.FormatPoly, Loc: ".poly", ToLoc: "", BasePath: "../polygons/", BaseURL: ""},
108 | formats.FormatState: {ID: formats.FormatState, Loc: ".state.txt", ToLoc: "", BasePath: "", BaseURL: ""},
109 | },
110 | Timeout: timeout,
111 | DomainGlob: "*",
112 | RandomDelay: randomDelay,
113 | },
114 | }
115 | }
116 |
117 | // Collector returns a Colly collector for OpenstreetmapFR.
118 | func (o *OpenstreetmapFR) Collector() *colly.Collector {
119 | c := o.Scrapper.Collector()
120 | c.OnHTML("a", func(e *colly.HTMLElement) {
121 | o.Parse(e, c)
122 | })
123 |
124 | return c
125 | }
126 |
127 | // GetParent returns the parent and the list of parents from a given href.
128 | func GetParent(href string) (parent string, parentList []string) {
129 | // Remove the last / from the href to avoid empty string in the parent list
130 | href = strings.TrimSuffix(href, "/")
131 |
132 | parentList = strings.Split(href, "/")
133 | if len(parentList) > minParentListLength {
134 | parent = parentList[len(parentList)-2]
135 | } else {
136 | parent = ""
137 | }
138 |
139 | if strings.EqualFold(parent, "extracts") || strings.EqualFold(parent, "polygons") {
140 | parent = ""
141 | }
142 |
143 | return parent, parentList
144 | }
145 |
146 | // MakeParents creates parent elements recursively.
147 | func (o *OpenstreetmapFR) MakeParents(parent string, gparents []string) {
148 | if parent == "" {
149 | return
150 | }
151 |
152 | gparent := getGparent(gparents)
153 |
154 | if !o.Config.Exist(parent) {
155 | o.createAndMergeElement(parent, gparent)
156 |
157 | if gparent != "" {
158 | o.MakeParents(gparent, gparents[:len(gparents)-1])
159 | }
160 | }
161 | }
162 |
163 | // getGparent returns the grandparent from a list of parents.
164 | func getGparent(gparents []string) string {
165 | if len(gparents) < minParentListLength {
166 | return ""
167 | }
168 |
169 | gparent := gparents[len(gparents)-3]
170 | if gparent == "http:" || gparent == "openstreetmap.fr" || gparent == "extracts" || gparent == "polygons" {
171 | return ""
172 | }
173 |
174 | return gparent
175 | }
176 |
177 | // createAndMergeElement creates and merges an element into the configuration.
178 | func (o *OpenstreetmapFR) createAndMergeElement(parent, gparent string) {
179 | myElement := element.Element{
180 | Parent: gparent,
181 | Name: parent,
182 | ID: parent,
183 | Formats: []string{},
184 | File: "",
185 | Meta: true,
186 | }
187 |
188 | if err := o.Config.MergeElement(&myElement); err != nil {
189 | slog.Error("Can't merge", "name", myElement.Name, "error", err)
190 | }
191 | }
192 |
193 | // Exceptions returns the exception name if it exists in the exception list.
194 | func Exceptions(name, parent string) string {
195 | if _, exists := exceptionList[name]; exists {
196 | return fmt.Sprintf("%v_%v", parent, name)
197 | }
198 |
199 | return name
200 | }
201 |
202 | // ParseHref parses the href and updates the configuration.
203 | func (o *OpenstreetmapFR) ParseHref(href string) {
204 | slog.Debug("Parsing", "href", href)
205 |
206 | if strings.Contains(href, "?") || strings.Contains(href, "-latest") || href[0] == '/' {
207 | return
208 | }
209 |
210 | parent, parents := GetParent(href)
211 | if !o.Config.Exist(parent) {
212 | o.MakeParents(parent, parents)
213 | }
214 |
215 | valsplit := strings.Split(parents[len(parents)-1], ".")
216 | if valsplit[0] == "" || len(strings.Split(href, "/")) <= minParentListLength {
217 | return
218 | }
219 |
220 | if strings.Contains(passList, valsplit[0]) {
221 | return
222 | }
223 |
224 | name := Exceptions(valsplit[0], parent)
225 | slog.Debug("Parsing", "name", name)
226 |
227 | extension := strings.Join(valsplit[1:], ".")
228 | if strings.Contains(extension, "state.txt") {
229 | extension = formats.FormatState
230 | }
231 |
232 | slog.Debug("Add format", "extension", extension)
233 |
234 | file := ""
235 | if extension != "" {
236 | file = valsplit[0]
237 | }
238 |
239 | o.addOrUpdateElement(parent, name, file, extension)
240 | }
241 |
242 | // addOrUpdateElement adds or updates an element in the configuration.
243 | func (o *OpenstreetmapFR) addOrUpdateElement(parent, name, file, extension string) {
244 | myElement := element.Element{
245 | ID: name,
246 | File: file,
247 | Name: name,
248 | Parent: parent,
249 | Formats: []string{},
250 | Meta: false,
251 | }
252 |
253 | if extension == "" {
254 | myElement.File = ""
255 | myElement.Meta = true
256 | }
257 |
258 | if !o.Config.Exist(name) {
259 | if extension != "" {
260 | myElement.Formats = append(myElement.Formats, extension)
261 | }
262 |
263 | if err := o.Config.MergeElement(&myElement); err != nil {
264 | slog.Error("Can't merge", "name", myElement.Name, "error", err)
265 | }
266 | } else {
267 | slog.Debug("Already exist, merging formats", "name", name)
268 |
269 | if extension != "" {
270 | o.Config.AddExtension(name, extension)
271 | }
272 | }
273 | }
274 |
275 | // Parse parses the HTML element and visits the URL if it's a directory.
276 | func (o *OpenstreetmapFR) Parse(e *colly.HTMLElement, c *colly.Collector) {
277 | href := e.Request.AbsoluteURL(e.Attr("href"))
278 | if isDirectory(href) {
279 | slog.Debug("Next", "href", href)
280 | visitURL(c, href)
281 | } else {
282 | o.ParseHref(href)
283 | }
284 | }
285 |
286 | // isDirectory checks if the URL is a directory.
287 | func isDirectory(href string) bool {
288 | return href[len(href)-1] == '/'
289 | }
290 |
291 | // visitURL visits the URL and handles errors.
292 | func visitURL(c *colly.Collector, href string) {
293 | if err := c.Visit(href); err != nil && !errors.Is(err, &colly.AlreadyVisitedError{}) {
294 | if !errors.Is(err, colly.ErrNoURLFiltersMatch) {
295 | slog.Error("Can't get url", "error", err)
296 | } else {
297 | slog.Debug("URL filtered", "url", href)
298 | }
299 | }
300 | }
301 |
--------------------------------------------------------------------------------
/internal/generator/generator_test.go:
--------------------------------------------------------------------------------
1 | package generator //nolint:testpackage // testing internal functions
2 |
3 | import (
4 | "fmt"
5 | "net/http"
6 | "net/http/httptest"
7 | "os"
8 | "path/filepath"
9 | "reflect"
10 | "sync"
11 | "testing"
12 |
13 | "github.com/gocolly/colly/v2"
14 | "github.com/julien-noblet/download-geofabrik/internal/config"
15 | "github.com/julien-noblet/download-geofabrik/internal/element"
16 | "github.com/julien-noblet/download-geofabrik/internal/generator/importer/geofabrik"
17 | "github.com/julien-noblet/download-geofabrik/pkg/formats"
18 | "github.com/stretchr/testify/assert"
19 | "github.com/stretchr/testify/require"
20 | yaml "gopkg.in/yaml.v3"
21 | )
22 |
23 | // MockScrapper implements scrapper.IScrapper.
24 | type MockScrapper struct {
25 | BaseURL string
26 | StartURL string
27 | PB int
28 | }
29 |
30 | func (m *MockScrapper) GetConfig() *config.Config {
31 | return &config.Config{
32 | Elements: element.MapElement{},
33 | ElementsMutex: &sync.RWMutex{},
34 | }
35 | }
36 |
37 | func (m *MockScrapper) Collector() *colly.Collector {
38 | // Return a collector that visits mock URL?
39 | // We need it to NOT hit internet.
40 | return colly.NewCollector()
41 | }
42 |
43 | func (m *MockScrapper) Limit() *colly.LimitRule {
44 | return &colly.LimitRule{}
45 | }
46 |
47 | func (m *MockScrapper) GetPB() int {
48 | return m.PB
49 | }
50 |
51 | func (m *MockScrapper) GetStartURL() string {
52 | return m.StartURL
53 | }
54 | func (m *MockScrapper) ParseFormat(_, _ string) {}
55 |
56 | func sampleAfricaElementPtr() *element.Element {
57 | return &element.Element{
58 | ID: "africa",
59 | Name: "Africa",
60 | Formats: []string{
61 | formats.FormatOsmPbf,
62 | "osm.pbf.md5",
63 | formats.FormatOsmBz2,
64 | "osm.bz2.md5",
65 | formats.FormatOshPbf,
66 | "osh.pbf.md5",
67 | formats.FormatPoly,
68 | formats.FormatKml,
69 | formats.FormatState,
70 | },
71 | }
72 | }
73 |
74 | func sampleGeorgiaUsElementPtr() *element.Element {
75 | return &element.Element{
76 | ID: "georgia-us",
77 | File: "georgia",
78 | Name: "Georgia (US State)",
79 | Formats: []string{
80 | formats.FormatOsmPbf,
81 | "osm.pbf.md5",
82 | formats.FormatShpZip,
83 | formats.FormatOsmBz2,
84 | "osm.bz2.md5",
85 | formats.FormatOshPbf,
86 | "osh.pbf.md5",
87 | formats.FormatPoly,
88 | formats.FormatKml,
89 | formats.FormatState,
90 | },
91 | Parent: "us",
92 | }
93 | }
94 |
95 | func sampleUsElementPtr() *element.Element {
96 | return &element.Element{
97 | ID: "us",
98 | Meta: true,
99 | Name: "United States of America",
100 | Parent: "north-america",
101 | }
102 | }
103 |
104 | func sampleNorthAmericaElementPtr() *element.Element {
105 | return &element.Element{
106 | ID: "north-america",
107 | Name: "North America",
108 | Formats: []string{
109 | formats.FormatOsmPbf,
110 | "osm.pbf.md5",
111 | formats.FormatOsmBz2,
112 | "osm.bz2.md5",
113 | formats.FormatOshPbf,
114 | "osh.pbf.md5",
115 | formats.FormatPoly,
116 | formats.FormatKml,
117 | formats.FormatState,
118 | },
119 | }
120 | }
121 |
122 | func sampleElementValidPtr() map[string]element.Element {
123 | return map[string]element.Element{
124 | "africa": *sampleAfricaElementPtr(),
125 | "georgia-us": *sampleGeorgiaUsElementPtr(),
126 | "us": *sampleUsElementPtr(),
127 | "north-america": *sampleNorthAmericaElementPtr(),
128 | }
129 | }
130 |
131 | func sampleFormatValidPtr() map[string]formats.Format {
132 | return map[string]formats.Format{
133 | // Blank
134 | "": {
135 | ID: "",
136 | Loc: "",
137 | BasePath: "",
138 | }, formats.FormatOsmPbf: {
139 | ID: formats.FormatOsmPbf,
140 | Loc: ".osm.pbf",
141 | // BasePath: "/",
142 | }, formats.FormatState: {
143 | ID: formats.FormatState,
144 | Loc: "-updates/state.txt",
145 | BasePath: "../state/",
146 | }, formats.FormatPoly: {
147 | ID: formats.FormatPoly,
148 | Loc: ".poly",
149 | BaseURL: "http://my.new.url/folder",
150 | }, formats.FormatOsmBz2: {
151 | ID: formats.FormatOsmBz2,
152 | Loc: ".osm.bz2",
153 | BasePath: "../osmbz2/",
154 | BaseURL: "http://my.new.url/folder",
155 | }, formats.FormatOsmGz: {
156 | ID: formats.FormatOsmGz,
157 | Loc: ".osm.gz",
158 | BasePath: "../osmgz/",
159 | BaseURL: "http://my.new.url/folder",
160 | },
161 | }
162 | }
163 |
164 | func SampleConfigValidPtr() config.Config {
165 | return config.Config{
166 | BaseURL: "https://my.base.url",
167 | Formats: sampleFormatValidPtr(),
168 | Elements: sampleElementValidPtr(),
169 | }
170 | }
171 |
172 | func TestSlice_Generate(t *testing.T) {
173 | t.Parallel()
174 |
175 | tests := []struct {
176 | name string
177 | e element.MapElement
178 | want []byte
179 | wantErr bool
180 | }{
181 | // TODO: Add test cases.
182 | {
183 | name: "Marshaling OK, no error",
184 | e: sampleElementValidPtr(),
185 | want: []byte{},
186 | wantErr: false,
187 | },
188 | }
189 | for _, thisTest := range tests {
190 | myConfig := SampleConfigValidPtr()
191 | myConfig.Elements = map[string]element.Element{} // void Elements
192 | myConfig.Elements = thisTest.e
193 | thisTest.want, _ = yaml.Marshal(myConfig)
194 | t.Run(thisTest.name, func(t *testing.T) {
195 | t.Parallel()
196 |
197 | got, err := myConfig.Generate()
198 | if err != nil != thisTest.wantErr {
199 | t.Errorf("Slice.Generate() error = %v, wantErr %v", err, thisTest.wantErr)
200 |
201 | return
202 | }
203 |
204 | if !reflect.DeepEqual(got, thisTest.want) {
205 | t.Errorf("Slice.Generate() = %v, want %v", got, thisTest.want)
206 | }
207 | })
208 | }
209 | }
210 |
211 | func TestGenerate(t *testing.T) {
212 | t.Parallel()
213 |
214 | type args struct {
215 | service string
216 | configfile string
217 | progress bool
218 | }
219 |
220 | tests := []struct {
221 | name string
222 | args args
223 | }{
224 | // TODO: Add test cases.
225 | {
226 | name: "run",
227 | args: args{
228 | service: ServiceGeofabrik,
229 | progress: false,
230 | configfile: "/tmp/gen_test.yml",
231 | },
232 | },
233 | }
234 |
235 | for _, tt := range tests {
236 | t.Run(tt.name, func(t *testing.T) {
237 | t.Parallel()
238 |
239 | if err := Generate(tt.args.service, tt.args.progress, tt.args.configfile); err != nil {
240 | t.Errorf("Generate() error = %v", err)
241 | }
242 | })
243 | }
244 | }
245 |
246 | func Test_write(t *testing.T) {
247 | t.Parallel()
248 |
249 | tests := []struct {
250 | name string
251 | input string
252 | output string
253 | }{
254 | // TODO: Add test cases.
255 | {name: "geofabrik", input: "../../geofabrik.yml", output: "/tmp/test.yml"},
256 | }
257 | for _, thisTest := range tests {
258 | t.Run(thisTest.name, func(t *testing.T) {
259 | t.Parallel()
260 |
261 | c, _ := config.LoadConfig(thisTest.input)
262 |
263 | err := Write(c, thisTest.output)
264 | if err != nil {
265 | t.Errorf("write() error = %v", err)
266 | }
267 |
268 | input, err := os.ReadFile(thisTest.input)
269 | if err != nil {
270 | t.Errorf("read() error = %v", err)
271 | }
272 |
273 | output, err := os.ReadFile(thisTest.output)
274 | if err != nil {
275 | t.Errorf("read() error = %v", err)
276 | }
277 |
278 | reflect.DeepEqual(input, output)
279 | })
280 | }
281 | }
282 |
283 | func TestCleanup(t *testing.T) {
284 | t.Parallel()
285 |
286 | type args struct {
287 | c *config.Config
288 | }
289 |
290 | tests := []struct {
291 | name string
292 | args args
293 | want element.Formats
294 | }{
295 | // TODO: Add test cases.
296 | {
297 | name: "example 1",
298 | args: args{
299 | c: &config.Config{
300 | BaseURL: "https://my.base.url",
301 | Formats: formats.FormatDefinitions{
302 | formats.FormatOsmPbf: {
303 | ID: formats.FormatOsmPbf,
304 | Loc: ".osm.pbf",
305 | // BasePath: "/",
306 | }, formats.FormatState: {
307 | ID: formats.FormatState,
308 | Loc: "-updates/state.txt",
309 | BasePath: "../state/",
310 | }, formats.FormatPoly: {
311 | ID: formats.FormatPoly,
312 | Loc: ".poly",
313 | BaseURL: "http://my.new.url/folder",
314 | }, formats.FormatOsmBz2: {
315 | ID: formats.FormatOsmBz2,
316 | Loc: ".osm.bz2",
317 | BasePath: "../osmbz2/",
318 | BaseURL: "http://my.new.url/folder",
319 | }, formats.FormatOsmGz: {
320 | ID: formats.FormatOsmGz,
321 | Loc: ".osm.gz",
322 | BasePath: "../osmgz/",
323 | BaseURL: "http://my.new.url/folder",
324 | },
325 | },
326 | Elements: element.MapElement{
327 | "africa": {
328 | ID: "africa",
329 | Name: "Africa",
330 | Formats: []string{
331 | formats.FormatOsmPbf,
332 | "osm.pbf.md5",
333 | formats.FormatOsmBz2,
334 | "osm.bz2.md5",
335 | formats.FormatOshPbf,
336 | "osh.pbf.md5",
337 | formats.FormatPoly,
338 | formats.FormatKml,
339 | formats.FormatState,
340 | },
341 | },
342 | },
343 | },
344 | },
345 | want: element.Formats{
346 | formats.FormatKml,
347 | formats.FormatOshPbf,
348 | "osh.pbf.md5",
349 | formats.FormatOsmBz2,
350 | "osm.bz2.md5",
351 | formats.FormatOsmPbf,
352 | "osm.pbf.md5",
353 | formats.FormatPoly,
354 | formats.FormatState,
355 | },
356 | },
357 | {
358 | name: "example 2",
359 | args: args{
360 | c: &config.Config{
361 | BaseURL: "https://my.base.url",
362 | Formats: formats.FormatDefinitions{},
363 | Elements: element.MapElement{
364 | "africa": {
365 | ID: "africa",
366 | Name: "Africa",
367 | Formats: []string{
368 | formats.FormatOsmPbf,
369 | formats.FormatGeoJSON,
370 | formats.FormatPoly,
371 | formats.FormatState,
372 | },
373 | },
374 | },
375 | },
376 | },
377 | want: element.Formats{
378 | formats.FormatGeoJSON,
379 | formats.FormatOsmPbf,
380 | formats.FormatPoly,
381 | formats.FormatState,
382 | },
383 | },
384 | }
385 |
386 | for _, tt := range tests {
387 | myTest := tt
388 |
389 | t.Run(myTest.name, func(t *testing.T) {
390 | t.Parallel()
391 |
392 | af := myTest.args.c.Elements["africa"]
393 | Cleanup(myTest.args.c)
394 | // compare af.Formats != tt.want
395 | if !reflect.DeepEqual(af.Formats, myTest.want) {
396 | t.Errorf("Cleanup() = %v, want %v", af.Formats, myTest.want)
397 | }
398 | })
399 | }
400 | }
401 |
402 | func TestPerformGenerate_WithMock(t *testing.T) {
403 | // Mock server
404 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
405 | fmt.Fprintln(w, `{
406 | "features": [
407 | {
408 | "properties": {
409 | "id": "test-region",
410 | "name": "Test Region",
411 | "urls": {
412 | "pbf": "https://example.com/test-region.osm.pbf"
413 | }
414 | }
415 | }
416 | ]
417 | }`)
418 | }))
419 | defer ts.Close()
420 |
421 | // Override URL
422 | oldURL := geofabrik.GeofabrikIndexURL
423 | geofabrik.GeofabrikIndexURL = ts.URL
424 |
425 | defer func() { geofabrik.GeofabrikIndexURL = oldURL }()
426 |
427 | // Temp config file
428 | tmpDir := t.TempDir()
429 | configFile := filepath.Join(tmpDir, "test_config.yml")
430 |
431 | // Run Generate
432 | err := PerformGenerate(ServiceGeofabrik, false, configFile)
433 | require.NoError(t, err)
434 |
435 | // Verify file exists
436 | _, err = os.Stat(configFile)
437 | require.NoError(t, err)
438 |
439 | // Verify content (basic check)
440 | content, err := os.ReadFile(configFile)
441 | require.NoError(t, err)
442 | assert.Contains(t, string(content), "test-region")
443 | assert.Contains(t, string(content), "osm.pbf")
444 | }
445 |
446 | func TestHandleProgress(_ *testing.T) {
447 | // Mock scrapper with mock server
448 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
449 | fmt.Fprintln(w, "OK")
450 | }))
451 | defer ts.Close()
452 |
453 | ms := &MockScrapper{
454 | StartURL: ts.URL,
455 | PB: 10,
456 | }
457 |
458 | // Capture stdout? No need, just ensure no panic and coverage.
459 | handleProgress(ms)
460 | }
461 |
462 | func TestVisitAndWait(_ *testing.T) {
463 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
464 | fmt.Fprintln(w, "OK")
465 | }))
466 | defer ts.Close()
467 |
468 | c := colly.NewCollector()
469 | visitAndWait(c, ts.URL)
470 | }
471 |
472 | func TestPerformGenerate_Unknown(t *testing.T) {
473 | err := PerformGenerate("unknown", false, "")
474 | require.Error(t, err)
475 | assert.ErrorIs(t, err, ErrUnknownService)
476 | }
477 |
478 | func TestPerformGenerate_Geofabrik_Error(t *testing.T) {
479 | // Mock server returns 500
480 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
481 | w.WriteHeader(http.StatusInternalServerError)
482 | }))
483 | defer ts.Close()
484 |
485 | oldURL := geofabrik.GeofabrikIndexURL
486 | geofabrik.GeofabrikIndexURL = ts.URL
487 |
488 | defer func() { geofabrik.GeofabrikIndexURL = oldURL }()
489 |
490 | err := PerformGenerate(ServiceGeofabrik, false, "dummy")
491 | require.Error(t, err)
492 | // Check error message contains "failed to get index"
493 | assert.Contains(t, err.Error(), "failed to get index")
494 | }
495 |
496 | func TestPerformGenerate_Geofabrik_InvalidJSON(t *testing.T) {
497 | // Mock server returns invalid JSON
498 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
499 | fmt.Fprintln(w, `invalid json`)
500 | }))
501 | defer ts.Close()
502 |
503 | oldURL := geofabrik.GeofabrikIndexURL
504 | geofabrik.GeofabrikIndexURL = ts.URL
505 |
506 | defer func() { geofabrik.GeofabrikIndexURL = oldURL }()
507 |
508 | err := PerformGenerate(ServiceGeofabrik, false, "dummy")
509 | require.Error(t, err)
510 | assert.Contains(t, err.Error(), "failed to get index") // GetIndex fails at unmarshal -> "error while unmarshalling" -> wrapped?
511 | // In geofabrik.go: return nil, fmt.Errorf(ErrUnmarshallingBody, err)
512 | // In generator.go: return fmt.Errorf("failed to get index: %w", err) (wait, GetIndex returns error)
513 | // generator.go: err := geofabrik.GetIndex... if err != nil ...
514 | }
515 |
516 | func TestPerformGenerate_Geofabrik_WriteError(t *testing.T) {
517 | // Mock server
518 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
519 | fmt.Fprintln(w, `{
520 | "features": []
521 | }`)
522 | }))
523 | defer ts.Close()
524 |
525 | oldURL := geofabrik.GeofabrikIndexURL
526 | geofabrik.GeofabrikIndexURL = ts.URL
527 |
528 | defer func() { geofabrik.GeofabrikIndexURL = oldURL }()
529 |
530 | // Invalid config file path (directory)
531 | tmpDir := t.TempDir()
532 |
533 | err := PerformGenerate(ServiceGeofabrik, false, tmpDir)
534 | require.Error(t, err)
535 | // Write fails because tmpDir is a directory or use /dev/null/fail?
536 | // os.WriteFile on directory fails with "is a directory" on Linux.
537 | // Or use a non-existent directory structure /non/existent/path.
538 | }
539 |
540 | func TestVisitAndWait_Error(_ *testing.T) {
541 | c := colly.NewCollector()
542 | // Invalid URL causes Visit to return error immediately
543 | // e.g. schemes must be present
544 | visitAndWait(c, ":/invalid-url")
545 | }
546 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Mozilla Public License, version 2.0
2 |
3 | 1. Definitions
4 |
5 | 1.1. "Contributor"
6 |
7 | means each individual or legal entity that creates, contributes to the
8 | creation of, or owns Covered Software.
9 |
10 | 1.2. "Contributor Version"
11 |
12 | means the combination of the Contributions of others (if any) used by a
13 | Contributor and that particular Contributor's Contribution.
14 |
15 | 1.3. "Contribution"
16 |
17 | means Covered Software of a particular Contributor.
18 |
19 | 1.4. "Covered Software"
20 |
21 | means Source Code Form to which the initial Contributor has attached the
22 | notice in Exhibit A, the Executable Form of such Source Code Form, and
23 | Modifications of such Source Code Form, in each case including portions
24 | thereof.
25 |
26 | 1.5. "Incompatible With Secondary Licenses"
27 | means
28 |
29 | a. that the initial Contributor has attached the notice described in
30 | Exhibit B to the Covered Software; or
31 |
32 | b. that the Covered Software was made available under the terms of
33 | version 1.1 or earlier of the License, but not also under the terms of
34 | a Secondary License.
35 |
36 | 1.6. "Executable Form"
37 |
38 | means any form of the work other than Source Code Form.
39 |
40 | 1.7. "Larger Work"
41 |
42 | means a work that combines Covered Software with other material, in a
43 | separate file or files, that is not Covered Software.
44 |
45 | 1.8. "License"
46 |
47 | means this document.
48 |
49 | 1.9. "Licensable"
50 |
51 | means having the right to grant, to the maximum extent possible, whether
52 | at the time of the initial grant or subsequently, any and all of the
53 | rights conveyed by this License.
54 |
55 | 1.10. "Modifications"
56 |
57 | means any of the following:
58 |
59 | a. any file in Source Code Form that results from an addition to,
60 | deletion from, or modification of the contents of Covered Software; or
61 |
62 | b. any new file in Source Code Form that contains any Covered Software.
63 |
64 | 1.11. "Patent Claims" of a Contributor
65 |
66 | means any patent claim(s), including without limitation, method,
67 | process, and apparatus claims, in any patent Licensable by such
68 | Contributor that would be infringed, but for the grant of the License,
69 | by the making, using, selling, offering for sale, having made, import,
70 | or transfer of either its Contributions or its Contributor Version.
71 |
72 | 1.12. "Secondary License"
73 |
74 | means either the GNU General Public License, Version 2.0, the GNU Lesser
75 | General Public License, Version 2.1, the GNU Affero General Public
76 | License, Version 3.0, or any later versions of those licenses.
77 |
78 | 1.13. "Source Code Form"
79 |
80 | means the form of the work preferred for making modifications.
81 |
82 | 1.14. "You" (or "Your")
83 |
84 | means an individual or a legal entity exercising rights under this
85 | License. For legal entities, "You" includes any entity that controls, is
86 | controlled by, or is under common control with You. For purposes of this
87 | definition, "control" means (a) the power, direct or indirect, to cause
88 | the direction or management of such entity, whether by contract or
89 | otherwise, or (b) ownership of more than fifty percent (50%) of the
90 | outstanding shares or beneficial ownership of such entity.
91 |
92 |
93 | 2. License Grants and Conditions
94 |
95 | 2.1. Grants
96 |
97 | Each Contributor hereby grants You a world-wide, royalty-free,
98 | non-exclusive license:
99 |
100 | a. under intellectual property rights (other than patent or trademark)
101 | Licensable by such Contributor to use, reproduce, make available,
102 | modify, display, perform, distribute, and otherwise exploit its
103 | Contributions, either on an unmodified basis, with Modifications, or
104 | as part of a Larger Work; and
105 |
106 | b. under Patent Claims of such Contributor to make, use, sell, offer for
107 | sale, have made, import, and otherwise transfer either its
108 | Contributions or its Contributor Version.
109 |
110 | 2.2. Effective Date
111 |
112 | The licenses granted in Section 2.1 with respect to any Contribution
113 | become effective for each Contribution on the date the Contributor first
114 | distributes such Contribution.
115 |
116 | 2.3. Limitations on Grant Scope
117 |
118 | The licenses granted in this Section 2 are the only rights granted under
119 | this License. No additional rights or licenses will be implied from the
120 | distribution or licensing of Covered Software under this License.
121 | Notwithstanding Section 2.1(b) above, no patent license is granted by a
122 | Contributor:
123 |
124 | a. for any code that a Contributor has removed from Covered Software; or
125 |
126 | b. for infringements caused by: (i) Your and any other third party's
127 | modifications of Covered Software, or (ii) the combination of its
128 | Contributions with other software (except as part of its Contributor
129 | Version); or
130 |
131 | c. under Patent Claims infringed by Covered Software in the absence of
132 | its Contributions.
133 |
134 | This License does not grant any rights in the trademarks, service marks,
135 | or logos of any Contributor (except as may be necessary to comply with
136 | the notice requirements in Section 3.4).
137 |
138 | 2.4. Subsequent Licenses
139 |
140 | No Contributor makes additional grants as a result of Your choice to
141 | distribute the Covered Software under a subsequent version of this
142 | License (see Section 10.2) or under the terms of a Secondary License (if
143 | permitted under the terms of Section 3.3).
144 |
145 | 2.5. Representation
146 |
147 | Each Contributor represents that the Contributor believes its
148 | Contributions are its original creation(s) or it has sufficient rights to
149 | grant the rights to its Contributions conveyed by this License.
150 |
151 | 2.6. Fair Use
152 |
153 | This License is not intended to limit any rights You have under
154 | applicable copyright doctrines of fair use, fair dealing, or other
155 | equivalents.
156 |
157 | 2.7. Conditions
158 |
159 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
160 | Section 2.1.
161 |
162 |
163 | 3. Responsibilities
164 |
165 | 3.1. Distribution of Source Form
166 |
167 | All distribution of Covered Software in Source Code Form, including any
168 | Modifications that You create or to which You contribute, must be under
169 | the terms of this License. You must inform recipients that the Source
170 | Code Form of the Covered Software is governed by the terms of this
171 | License, and how they can obtain a copy of this License. You may not
172 | attempt to alter or restrict the recipients' rights in the Source Code
173 | Form.
174 |
175 | 3.2. Distribution of Executable Form
176 |
177 | If You distribute Covered Software in Executable Form then:
178 |
179 | a. such Covered Software must also be made available in Source Code Form,
180 | as described in Section 3.1, and You must inform recipients of the
181 | Executable Form how they can obtain a copy of such Source Code Form by
182 | reasonable means in a timely manner, at a charge no more than the cost
183 | of distribution to the recipient; and
184 |
185 | b. You may distribute such Executable Form under the terms of this
186 | License, or sublicense it under different terms, provided that the
187 | license for the Executable Form does not attempt to limit or alter the
188 | recipients' rights in the Source Code Form under this License.
189 |
190 | 3.3. Distribution of a Larger Work
191 |
192 | You may create and distribute a Larger Work under terms of Your choice,
193 | provided that You also comply with the requirements of this License for
194 | the Covered Software. If the Larger Work is a combination of Covered
195 | Software with a work governed by one or more Secondary Licenses, and the
196 | Covered Software is not Incompatible With Secondary Licenses, this
197 | License permits You to additionally distribute such Covered Software
198 | under the terms of such Secondary License(s), so that the recipient of
199 | the Larger Work may, at their option, further distribute the Covered
200 | Software under the terms of either this License or such Secondary
201 | License(s).
202 |
203 | 3.4. Notices
204 |
205 | You may not remove or alter the substance of any license notices
206 | (including copyright notices, patent notices, disclaimers of warranty, or
207 | limitations of liability) contained within the Source Code Form of the
208 | Covered Software, except that You may alter any license notices to the
209 | extent required to remedy known factual inaccuracies.
210 |
211 | 3.5. Application of Additional Terms
212 |
213 | You may choose to offer, and to charge a fee for, warranty, support,
214 | indemnity or liability obligations to one or more recipients of Covered
215 | Software. However, You may do so only on Your own behalf, and not on
216 | behalf of any Contributor. You must make it absolutely clear that any
217 | such warranty, support, indemnity, or liability obligation is offered by
218 | You alone, and You hereby agree to indemnify every Contributor for any
219 | liability incurred by such Contributor as a result of warranty, support,
220 | indemnity or liability terms You offer. You may include additional
221 | disclaimers of warranty and limitations of liability specific to any
222 | jurisdiction.
223 |
224 | 4. Inability to Comply Due to Statute or Regulation
225 |
226 | If it is impossible for You to comply with any of the terms of this License
227 | with respect to some or all of the Covered Software due to statute,
228 | judicial order, or regulation then You must: (a) comply with the terms of
229 | this License to the maximum extent possible; and (b) describe the
230 | limitations and the code they affect. Such description must be placed in a
231 | text file included with all distributions of the Covered Software under
232 | this License. Except to the extent prohibited by statute or regulation,
233 | such description must be sufficiently detailed for a recipient of ordinary
234 | skill to be able to understand it.
235 |
236 | 5. Termination
237 |
238 | 5.1. The rights granted under this License will terminate automatically if You
239 | fail to comply with any of its terms. However, if You become compliant,
240 | then the rights granted under this License from a particular Contributor
241 | are reinstated (a) provisionally, unless and until such Contributor
242 | explicitly and finally terminates Your grants, and (b) on an ongoing
243 | basis, if such Contributor fails to notify You of the non-compliance by
244 | some reasonable means prior to 60 days after You have come back into
245 | compliance. Moreover, Your grants from a particular Contributor are
246 | reinstated on an ongoing basis if such Contributor notifies You of the
247 | non-compliance by some reasonable means, this is the first time You have
248 | received notice of non-compliance with this License from such
249 | Contributor, and You become compliant prior to 30 days after Your receipt
250 | of the notice.
251 |
252 | 5.2. If You initiate litigation against any entity by asserting a patent
253 | infringement claim (excluding declaratory judgment actions,
254 | counter-claims, and cross-claims) alleging that a Contributor Version
255 | directly or indirectly infringes any patent, then the rights granted to
256 | You by any and all Contributors for the Covered Software under Section
257 | 2.1 of this License shall terminate.
258 |
259 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
260 | license agreements (excluding distributors and resellers) which have been
261 | validly granted by You or Your distributors under this License prior to
262 | termination shall survive termination.
263 |
264 | 6. Disclaimer of Warranty
265 |
266 | Covered Software is provided under this License on an "as is" basis,
267 | without warranty of any kind, either expressed, implied, or statutory,
268 | including, without limitation, warranties that the Covered Software is free
269 | of defects, merchantable, fit for a particular purpose or non-infringing.
270 | The entire risk as to the quality and performance of the Covered Software
271 | is with You. Should any Covered Software prove defective in any respect,
272 | You (not any Contributor) assume the cost of any necessary servicing,
273 | repair, or correction. This disclaimer of warranty constitutes an essential
274 | part of this License. No use of any Covered Software is authorized under
275 | this License except under this disclaimer.
276 |
277 | 7. Limitation of Liability
278 |
279 | Under no circumstances and under no legal theory, whether tort (including
280 | negligence), contract, or otherwise, shall any Contributor, or anyone who
281 | distributes Covered Software as permitted above, be liable to You for any
282 | direct, indirect, special, incidental, or consequential damages of any
283 | character including, without limitation, damages for lost profits, loss of
284 | goodwill, work stoppage, computer failure or malfunction, or any and all
285 | other commercial damages or losses, even if such party shall have been
286 | informed of the possibility of such damages. This limitation of liability
287 | shall not apply to liability for death or personal injury resulting from
288 | such party's negligence to the extent applicable law prohibits such
289 | limitation. Some jurisdictions do not allow the exclusion or limitation of
290 | incidental or consequential damages, so this exclusion and limitation may
291 | not apply to You.
292 |
293 | 8. Litigation
294 |
295 | Any litigation relating to this License may be brought only in the courts
296 | of a jurisdiction where the defendant maintains its principal place of
297 | business and such litigation shall be governed by laws of that
298 | jurisdiction, without reference to its conflict-of-law provisions. Nothing
299 | in this Section shall prevent a party's ability to bring cross-claims or
300 | counter-claims.
301 |
302 | 9. Miscellaneous
303 |
304 | This License represents the complete agreement concerning the subject
305 | matter hereof. If any provision of this License is held to be
306 | unenforceable, such provision shall be reformed only to the extent
307 | necessary to make it enforceable. Any law or regulation which provides that
308 | the language of a contract shall be construed against the drafter shall not
309 | be used to construe this License against a Contributor.
310 |
311 |
312 | 10. Versions of the License
313 |
314 | 10.1. New Versions
315 |
316 | Mozilla Foundation is the license steward. Except as provided in Section
317 | 10.3, no one other than the license steward has the right to modify or
318 | publish new versions of this License. Each version will be given a
319 | distinguishing version number.
320 |
321 | 10.2. Effect of New Versions
322 |
323 | You may distribute the Covered Software under the terms of the version
324 | of the License under which You originally received the Covered Software,
325 | or under the terms of any subsequent version published by the license
326 | steward.
327 |
328 | 10.3. Modified Versions
329 |
330 | If you create software not governed by this License, and you want to
331 | create a new license for such software, you may create and use a
332 | modified version of this License if you rename the license and remove
333 | any references to the name of the license steward (except to note that
334 | such modified license differs from this License).
335 |
336 | 10.4. Distributing Source Code Form that is Incompatible With Secondary
337 | Licenses If You choose to distribute Source Code Form that is
338 | Incompatible With Secondary Licenses under the terms of this version of
339 | the License, the notice described in Exhibit B of this License must be
340 | attached.
341 |
342 | Exhibit A - Source Code Form License Notice
343 |
344 | This Source Code Form is subject to the
345 | terms of the Mozilla Public License, v.
346 | 2.0. If a copy of the MPL was not
347 | distributed with this file, You can
348 | obtain one at
349 | http://mozilla.org/MPL/2.0/.
350 |
351 | If it is not possible or desirable to put the notice in a particular file,
352 | then You may include the notice in a location (such as a LICENSE file in a
353 | relevant directory) where a recipient would be likely to look for such a
354 | notice.
355 |
356 | You may add additional accurate notices of copyright ownership.
357 |
358 | Exhibit B - "Incompatible With Secondary Licenses" Notice
359 |
360 | This Source Code Form is "Incompatible
361 | With Secondary Licenses", as defined by
362 | the Mozilla Public License, v. 2.0.
363 |
364 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43xxfqw=
2 | github.com/PuerkitoBio/goquery v1.11.0/go.mod h1:wQHgxUOU3JGuj3oD/QFfxUdlzW6xPHfqyHre6VMY4DQ=
3 | github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
4 | github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
5 | github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
6 | github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
7 | github.com/antchfx/htmlquery v1.3.5 h1:aYthDDClnG2a2xePf6tys/UyyM/kRcsFRm+ifhFKoU0=
8 | github.com/antchfx/htmlquery v1.3.5/go.mod h1:5oyIPIa3ovYGtLqMPNjBF2Uf25NPCKsMjCnQ8lvjaoA=
9 | github.com/antchfx/xmlquery v1.5.0 h1:uAi+mO40ZWfyU6mlUBxRVvL6uBNZ6LMU4M3+mQIBV4c=
10 | github.com/antchfx/xmlquery v1.5.0/go.mod h1:lJfWRXzYMK1ss32zm1GQV3gMIW/HFey3xDZmkP1SuNc=
11 | github.com/antchfx/xpath v1.3.5 h1:PqbXLC3TkfeZyakF5eeh3NTWEbYl4VHNVeufANzDbKQ=
12 | github.com/antchfx/xpath v1.3.5/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
13 | github.com/bits-and-blooms/bitset v1.20.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
14 | github.com/bits-and-blooms/bitset v1.24.4 h1:95H15Og1clikBrKr/DuzMXkQzECs1M6hhoGXLwLQOZE=
15 | github.com/bits-and-blooms/bitset v1.24.4/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
16 | github.com/cheggaaa/pb/v3 v3.1.7 h1:2FsIW307kt7A/rz/ZI2lvPO+v3wKazzE4K/0LtTWsOI=
17 | github.com/cheggaaa/pb/v3 v3.1.7/go.mod h1:/Ji89zfVPeC/u5j8ukD0MBPHt2bzTYp74lQ7KlgFWTQ=
18 | github.com/clipperhouse/displaywidth v0.6.1 h1:/zMlAezfDzT2xy6acHBzwIfyu2ic0hgkT83UX5EY2gY=
19 | github.com/clipperhouse/displaywidth v0.6.1/go.mod h1:R+kHuzaYWFkTm7xoMmK1lFydbci4X2CicfbGstSGg0o=
20 | github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs=
21 | github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA=
22 | github.com/clipperhouse/uax29/v2 v2.3.0 h1:SNdx9DVUqMoBuBoW3iLOj4FQv3dN5mDtuqwuhIGpJy4=
23 | github.com/clipperhouse/uax29/v2 v2.3.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g=
24 | github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
25 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
26 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
27 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
28 | github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
29 | github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
30 | github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
31 | github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
32 | github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
33 | github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
34 | github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs=
35 | github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
36 | github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
37 | github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
38 | github.com/gocolly/colly/v2 v2.3.0 h1:HSFh0ckbgVd2CSGRE+Y/iA4goUhGROJwyQDCMXGFBWM=
39 | github.com/gocolly/colly/v2 v2.3.0/go.mod h1:Qp54s/kQbwCQvFVx8KzKCSTXVJ1wWT4QeAKEu33x1q8=
40 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
41 | github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ=
42 | github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw=
43 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
44 | github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
45 | github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
46 | github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
47 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
48 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
49 | github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
50 | github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
51 | github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
52 | github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
53 | github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
54 | github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
55 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
56 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
57 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
58 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
59 | github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
60 | github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
61 | github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
62 | github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
63 | github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw=
64 | github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
65 | github.com/nlnwa/whatwg-url v0.6.2 h1:jU61lU2ig4LANydbEJmA2nPrtCGiKdtgT0rmMd2VZ/Q=
66 | github.com/nlnwa/whatwg-url v0.6.2/go.mod h1:x0FPXJzzOEieQtsBT/AKvbiBbQ46YlL6Xa7m02M1ECk=
67 | github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 h1:zrbMGy9YXpIeTnGj4EljqMiZsIcE09mmF8XsD5AYOJc=
68 | github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6/go.mod h1:rEKTHC9roVVicUIfZK7DYrdIoM0EOr8mK1Hj5s3JjH0=
69 | github.com/olekukonko/errors v1.1.0 h1:RNuGIh15QdDenh+hNvKrJkmxxjV4hcS50Db478Ou5sM=
70 | github.com/olekukonko/errors v1.1.0/go.mod h1:ppzxA5jBKcO1vIpCXQ9ZqgDh8iwODz6OXIGKU8r5m4Y=
71 | github.com/olekukonko/ll v0.1.3 h1:sV2jrhQGq5B3W0nENUISCR6azIPf7UBUpVq0x/y70Fg=
72 | github.com/olekukonko/ll v0.1.3/go.mod h1:b52bVQRRPObe+yyBl0TxNfhesL0nedD4Cht0/zx55Ew=
73 | github.com/olekukonko/tablewriter v1.1.2 h1:L2kI1Y5tZBct/O/TyZK1zIE9GlBj/TVs+AY5tZDCDSc=
74 | github.com/olekukonko/tablewriter v1.1.2/go.mod h1:z7SYPugVqGVavWoA2sGsFIoOVNmEHxUAAMrhXONtfkg=
75 | github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
76 | github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
77 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
78 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
79 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
80 | github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
81 | github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
82 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
83 | github.com/sagikazarmark/locafero v0.12.0 h1:/NQhBAkUb4+fH1jivKHWusDYFjMOOKU88eegjfxfHb4=
84 | github.com/sagikazarmark/locafero v0.12.0/go.mod h1:sZh36u/YSZ918v0Io+U9ogLYQJ9tLLBmM4eneO6WwsI=
85 | github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA=
86 | github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
87 | github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I=
88 | github.com/spf13/afero v1.15.0/go.mod h1:NC2ByUVxtQs4b3sIUphxK0NioZnmxgyCrfzeuq8lxMg=
89 | github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY=
90 | github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo=
91 | github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
92 | github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
93 | github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
94 | github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
95 | github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
96 | github.com/spf13/viper v1.21.0 h1:x5S+0EU27Lbphp4UKm1C+1oQO+rKx36vfCoaVebLFSU=
97 | github.com/spf13/viper v1.21.0/go.mod h1:P0lhsswPGWD/1lZJ9ny3fYnVqxiegrlNrEmgLjbTCAY=
98 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
99 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
100 | github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
101 | github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
102 | github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
103 | github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
104 | github.com/temoto/robotstxt v1.1.2 h1:W2pOjSJ6SWvldyEuiFXNxz3xZ8aiWX5LbfDiOFd7Fxg=
105 | github.com/temoto/robotstxt v1.1.2/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
106 | github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
107 | go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
108 | go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
109 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
110 | golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
111 | golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
112 | golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
113 | golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
114 | golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
115 | golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc=
116 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
117 | golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
118 | golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
119 | golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
120 | golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
121 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
122 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
123 | golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
124 | golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
125 | golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
126 | golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
127 | golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
128 | golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
129 | golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
130 | golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
131 | golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
132 | golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
133 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
134 | golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
135 | golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
136 | golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
137 | golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
138 | golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
139 | golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
140 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
141 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
142 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
143 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
144 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
145 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
146 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
147 | golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
148 | golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
149 | golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
150 | golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
151 | golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
152 | golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
153 | golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
154 | golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
155 | golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
156 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
157 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
158 | golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
159 | golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
160 | golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
161 | golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
162 | golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
163 | golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
164 | golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek=
165 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
166 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
167 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
168 | golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
169 | golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
170 | golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
171 | golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
172 | golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
173 | golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
174 | golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
175 | golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
176 | golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
177 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
178 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
179 | golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
180 | golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
181 | golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
182 | golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
183 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
184 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
185 | google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM=
186 | google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds=
187 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
188 | google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
189 | google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
190 | google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
191 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
192 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
193 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
194 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
195 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
196 |
--------------------------------------------------------------------------------