├── .air.toml
├── .dockerignore
├── .github
├── FUNDING.yml
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── dependabot.yml
└── workflows
│ ├── ci_codeql.yml
│ ├── ci_main.yml
│ ├── ci_test.yml
│ └── release.yml
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── SECURITY.md
├── docker-compose.yaml
├── docker
└── Dockerfile
├── docs
├── example_category.json
└── hearchco.service
├── generate
├── enginer
│ ├── enginer.go
│ ├── structs.go
│ └── util.go
└── exchanger
│ ├── exchanger.go
│ ├── structs.go
│ └── util.go
├── go.mod
├── go.sum
├── goreleaser
├── develop.yml
└── release.yml
├── hearchco_example.yaml
├── scripts
├── test-dynamodb-docker.sh
├── test-dynamodb-podman.sh
├── test-dynamodb.sh
├── test-engines.sh
├── test-redis-docker.sh
├── test-redis-podman.sh
├── test-redis.sh
└── test.sh
└── src
├── cache
├── actions_currencies.go
├── db.go
├── driver.go
├── dynamodb
│ ├── dynamodb.go
│ └── dynamodb_test.go
├── nocache
│ ├── nocache.go
│ └── nocache_test.go
└── redis
│ ├── redis.go
│ └── redis_test.go
├── cli
├── flags.go
├── setup.go
└── version.go
├── config
├── defaults.go
├── load.go
├── structs_config.go
├── structs_engines.go
├── structs_exchange.go
└── structs_server.go
├── exchange
├── currency
│ ├── currency.go
│ └── map.go
├── engines
│ ├── currencyapi
│ │ ├── exchange.go
│ │ ├── info.go
│ │ ├── json.go
│ │ ├── new.go
│ │ └── note.md
│ ├── exchanger.go
│ ├── exchangerateapi
│ │ ├── exchange.go
│ │ ├── info.go
│ │ ├── json.go
│ │ └── new.go
│ ├── frankfurter
│ │ ├── exchange.go
│ │ ├── info.go
│ │ ├── json.go
│ │ └── new.go
│ └── name.go
└── exchange.go
├── logger
└── setup.go
├── main.go
├── profiler
└── run.go
├── router
├── lambda.go
├── middlewares
│ ├── compress.go
│ ├── logging.go
│ └── setup.go
├── router.go
└── routes
│ ├── params.go
│ ├── responses.go
│ ├── route_currencies.go
│ ├── route_exchange.go
│ ├── route_image_proxy.go
│ ├── route_search_images.go
│ ├── route_search_suggestions.go
│ ├── route_search_web.go
│ ├── setup.go
│ └── writers.go
├── search
├── category
│ ├── convert.go
│ ├── disabled.go
│ ├── json.go
│ └── type.go
├── context_cancel.go
├── engines
│ ├── _engines_test
│ │ ├── s_images.go
│ │ ├── s_suggestions.go
│ │ ├── s_web.go
│ │ └── structs.go
│ ├── bing
│ │ ├── bing.md
│ │ ├── dompaths.go
│ │ ├── info.go
│ │ ├── json.go
│ │ ├── new.go
│ │ ├── params.go
│ │ ├── s_images.go
│ │ ├── s_images_test.go
│ │ ├── s_web.go
│ │ ├── s_web_test.go
│ │ └── telemetry.go
│ ├── brave
│ │ ├── dompaths.go
│ │ ├── info.go
│ │ ├── new.go
│ │ ├── params.go
│ │ ├── s_web.go
│ │ └── s_web_test.go
│ ├── duckduckgo
│ │ ├── ddg.md
│ │ ├── dompaths.go
│ │ ├── info.go
│ │ ├── new.go
│ │ ├── params.go
│ │ ├── s_suggestions.go
│ │ ├── s_suggestions_test.go
│ │ ├── s_web.go
│ │ └── s_web_test.go
│ ├── etools
│ │ ├── dompaths.go
│ │ ├── etools.md
│ │ ├── info.go
│ │ ├── new.go
│ │ ├── params.go
│ │ ├── s_web.go
│ │ └── s_web_test.go
│ ├── google
│ │ ├── dompaths.go
│ │ ├── info.go
│ │ ├── json.go
│ │ ├── new.go
│ │ ├── params.go
│ │ ├── s_images.go
│ │ ├── s_images_test.go
│ │ ├── s_suggestions.go
│ │ ├── s_suggestions_test.go
│ │ ├── s_web.go
│ │ └── s_web_test.go
│ ├── googlescholar
│ │ ├── dompaths.go
│ │ ├── info.go
│ │ ├── new.go
│ │ ├── params.go
│ │ ├── s_web.go
│ │ ├── s_web_test.go
│ │ └── telemetry.go
│ ├── mojeek
│ │ ├── dompaths.go
│ │ ├── info.go
│ │ ├── new.go
│ │ ├── params.go
│ │ ├── s_web.go
│ │ └── s_web_test.go
│ ├── name.go
│ ├── options
│ │ ├── locale.go
│ │ └── structs.go
│ ├── presearch
│ │ ├── info.go
│ │ ├── json.go
│ │ ├── new.go
│ │ ├── params.go
│ │ ├── presearch.md
│ │ ├── s_web.go
│ │ └── s_web_test.go
│ ├── qwant
│ │ ├── info.go
│ │ ├── json.go
│ │ ├── new.go
│ │ ├── params.go
│ │ ├── qwant.md
│ │ ├── s_web.go
│ │ └── s_web_test.go
│ ├── startpage
│ │ ├── dompaths.go
│ │ ├── info.go
│ │ ├── new.go
│ │ ├── params.go
│ │ ├── s_web.go
│ │ ├── s_web_test.go
│ │ └── startpage.md
│ ├── swisscows
│ │ ├── authenticator.go
│ │ ├── info.go
│ │ ├── json.go
│ │ ├── new.go
│ │ ├── params.go
│ │ ├── s_web.go
│ │ └── s_web_test.go
│ ├── yahoo
│ │ ├── dompaths.go
│ │ ├── info.go
│ │ ├── new.go
│ │ ├── params.go
│ │ ├── s_web.go
│ │ ├── s_web_test.go
│ │ └── telemetry.go
│ └── yep
│ │ ├── info.go
│ │ ├── json.go
│ │ ├── new.go
│ │ ├── params.go
│ │ ├── s_web.go
│ │ └── s_web_test.go
├── groups.go
├── init.go
├── once.go
├── params.go
├── receiver.go
├── result
│ ├── construct.go
│ ├── interfaces.go
│ ├── output.go
│ ├── r_images.go
│ ├── r_images_output.go
│ ├── r_images_scraped.go
│ ├── r_suggestion.go
│ ├── r_suggestion_scraped.go
│ ├── r_web.go
│ ├── r_web_output.go
│ ├── r_web_scraped.go
│ ├── rank.go
│ ├── rank
│ │ ├── filler.go
│ │ ├── filler_test.go
│ │ ├── interfaces.go
│ │ ├── results.go
│ │ ├── score.go
│ │ ├── sort.go
│ │ ├── structs_test.go
│ │ └── suggestions.go
│ ├── rank_scraped.go
│ ├── ranksimple.go
│ ├── ranksimple_scraped.go
│ ├── result_map.go
│ ├── shorten.go
│ ├── shorten_test.go
│ └── suggestions_map.go
├── run_engine.go
├── run_engines.go
├── run_origins.go
├── s_images.go
├── s_suggestions.go
├── s_web.go
├── scraper
│ ├── collector.go
│ ├── dompaths.go
│ ├── enginebase.go
│ ├── interfaces.go
│ ├── pagecontext.go
│ ├── pagerankcounter.go
│ ├── parse
│ │ ├── fields.go
│ │ └── parse.go
│ ├── requests.go
│ ├── scrape.go
│ ├── suggest_resp.go
│ └── timeout.go
├── searchtype
│ └── name.go
└── useragent
│ └── useragent.go
└── utils
├── anonymize
├── hash.go
├── hash_test.go
├── string.go
├── string_test.go
└── structs_test.go
├── gotypelimits
├── ints.go
└── uints.go
├── kvpair
└── kvpair.go
├── morestrings
└── join.go
├── moretime
├── convert.go
└── types.go
└── moreurls
├── build.go
├── fqdn.go
└── params.go
/.air.toml:
--------------------------------------------------------------------------------
1 | root = "."
2 | testdata_dir = "testdata"
3 | tmp_dir = "tmp"
4 |
5 | [build]
6 | args_bin = []
7 | bin = "./tmp/main"
8 | cmd = "CGO_ENABLED=0 go build -ldflags \"-s -w\" -trimpath -o ./tmp/main ./src"
9 | delay = 1000
10 | exclude_dir = ["*"]
11 | exclude_file = []
12 | exclude_regex = ["_test.go"]
13 | exclude_unchanged = false
14 | follow_symlink = false
15 | full_bin = ""
16 | include_dir = ["src"]
17 | include_ext = ["go"]
18 | include_file = []
19 | kill_delay = "0s"
20 | log = "build-errors.log"
21 | poll = false
22 | poll_interval = 0
23 | post_cmd = []
24 | pre_cmd = []
25 | rerun = false
26 | rerun_delay = 500
27 | send_interrupt = true
28 | stop_on_error = false
29 |
30 | [color]
31 | app = ""
32 | build = "yellow"
33 | main = "magenta"
34 | runner = "green"
35 | watcher = "cyan"
36 |
37 | [log]
38 | main_only = false
39 | time = false
40 |
41 | [misc]
42 | clean_on_exit = true
43 |
44 | [screen]
45 | clear_on_rebuild = true
46 | keep_scroll = true
47 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | **
2 | !docker/
3 | !dist/
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [hearchco, aleksasiriski]
2 | ko_fi: aleksasiriski
3 | liberapay: hearchco
4 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: "[BUG]"
5 | labels: "bug"
6 | assignees: ""
7 | ---
8 |
9 | **Describe the bug**
10 | A clear and concise description of what the bug is.
11 |
12 | **To Reproduce**
13 | Steps to reproduce the behavior:
14 |
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **What's your setup? (please complete the following information):**
27 |
28 | - Using monolith or micro service setup?: [e.g. monolith]
29 | - Version of Hearchco [e.g. 1.2.3]
30 | - Method of installation [e.g. docker]
31 |
32 | **Docker compose file if used**
33 |
34 | ```docker
35 | Your compose goes here
36 | ```
37 |
38 | **Hearchco config file**
39 |
40 | ```yaml
41 | Your config goes here
42 | ```
43 |
44 | **Additional context**
45 | Add any other context about the problem here.
46 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Ask for adding a new feature
4 | title: "[FEAT]"
5 | labels: "enhancement"
6 | assignees: ""
7 | ---
8 |
9 | **Describe your feature request**
10 | A clear and concise description of what the feature request is.
11 |
12 | **Screenshots**
13 | If applicable, add screenshots to help explain your request.
14 |
15 | **Additional context**
16 | Add any other context about the problem here.
17 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "github-actions"
4 | directory: "/"
5 | schedule:
6 | interval: "weekly"
7 | - package-ecosystem: "docker"
8 | directory: "/"
9 | schedule:
10 | interval: "weekly"
11 | - package-ecosystem: "gomod"
12 | directory: "/"
13 | schedule:
14 | interval: "weekly"
15 |
--------------------------------------------------------------------------------
/.github/workflows/ci_codeql.yml:
--------------------------------------------------------------------------------
1 | name: CodeQL CI
2 |
3 | on:
4 | push:
5 | branches: ["main"]
6 | paths:
7 | - ".github/workflows/ci_codeql.yml" # this file
8 | - "go.mod"
9 | - "go.sum"
10 | - "Makefile"
11 | - "generate/**/*"
12 | - "src/**/*"
13 | pull_request:
14 | branches: ["*"]
15 | paths:
16 | - ".github/workflows/ci_codeql.yml" # this file
17 | - "go.mod"
18 | - "go.sum"
19 | - "Makefile"
20 | - "generate/**/*"
21 | - "src/**/*"
22 | schedule:
23 | - cron: "38 14 * * 3"
24 |
25 | jobs:
26 | analyze:
27 | name: Analyze
28 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
29 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
30 | permissions:
31 | security-events: write
32 | actions: read
33 | contents: read
34 |
35 | strategy:
36 | fail-fast: false
37 | matrix:
38 | language: ["go"]
39 |
40 | steps:
41 | - name: Checkout repository
42 | uses: actions/checkout@v4
43 |
44 | - name: Setup Go
45 | uses: actions/setup-go@v5
46 | with:
47 | go-version: stable
48 |
49 | - name: Initialize CodeQL
50 | uses: github/codeql-action/init@v3
51 | with:
52 | languages: ${{ matrix.language }}
53 |
54 | - name: Generate go code from go:generate comments
55 | run: make install
56 |
57 | - name: Build project
58 | run: make compile
59 |
60 | - name: Perform CodeQL Analysis
61 | uses: github/codeql-action/analyze@v3
62 | with:
63 | category: "/language:${{matrix.language}}"
64 |
--------------------------------------------------------------------------------
/.github/workflows/ci_main.yml:
--------------------------------------------------------------------------------
1 | name: Main CI
2 |
3 | on:
4 | push:
5 | branches: ["main"]
6 | paths:
7 | - ".github/workflows/ci_main.yml" # this file
8 | - "go.mod"
9 | - "go.sum"
10 | - "Makefile"
11 | - "goreleaser/develop.yml"
12 | - "generate/**/*"
13 | - "src/**/*"
14 | - "docker/**/*"
15 | - ".dockerignore"
16 |
17 | env:
18 | REGISTRY: ghcr.io
19 | IMAGE_NAME: ${{ github.repository }}
20 |
21 | jobs:
22 | goreleaser:
23 | runs-on: ubuntu-latest
24 | permissions:
25 | contents: read
26 |
27 | steps:
28 | - name: Checkout repository
29 | uses: actions/checkout@v4
30 | with:
31 | fetch-depth: 0
32 |
33 | - name: Setup Go
34 | uses: actions/setup-go@v5
35 | with:
36 | go-version: stable
37 |
38 | - name: Setup QEMU
39 | uses: docker/setup-qemu-action@v3
40 | with:
41 | platforms: arm64,arm
42 |
43 | - name: Generate go code from go:generate comments
44 | run: make install
45 |
46 | - name: Snapshot release
47 | uses: goreleaser/goreleaser-action@v6
48 | with:
49 | version: "~> v2"
50 | args: release --snapshot --clean --config goreleaser/develop.yml
51 |
52 | test:
53 | runs-on: ubuntu-latest
54 | permissions:
55 | contents: read
56 |
57 | steps:
58 | - name: Checkout repository
59 | uses: actions/checkout@v4
60 |
61 | - name: Setup Go
62 | uses: actions/setup-go@v5
63 | with:
64 | go-version: stable
65 |
66 | - name: Generate go code from go:generate comments
67 | run: make install
68 |
69 | - name: Build project
70 | run: make compile
71 |
72 | - name: Test units
73 | run: make test
74 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | push:
5 | tags: ["v*.*.*"]
6 |
7 | env:
8 | REGISTRY: ghcr.io
9 | IMAGE_NAME: ${{ github.repository }}
10 |
11 | jobs:
12 | goreleaser:
13 | runs-on: ubuntu-latest
14 | permissions:
15 | contents: write
16 | packages: write
17 | issues: write
18 |
19 | steps:
20 | - name: Checkout repository
21 | uses: actions/checkout@v4
22 | with:
23 | fetch-depth: 0
24 | fetch-tags: true
25 |
26 | - name: Setup Go
27 | uses: actions/setup-go@v5
28 | with:
29 | go-version: stable
30 |
31 | - name: Setup QEMU
32 | uses: docker/setup-qemu-action@v3
33 | with:
34 | platforms: arm64,arm
35 |
36 | - name: Login to GitHub Container Registry
37 | uses: docker/login-action@v3
38 | with:
39 | registry: ${{ env.REGISTRY }}
40 | username: ${{ github.actor }}
41 | password: ${{ secrets.GITHUB_TOKEN }}
42 |
43 | - name: Generate go code from go:generate comments
44 | run: make install
45 |
46 | - name: Release
47 | uses: goreleaser/goreleaser-action@v6
48 | with:
49 | version: "~> v2"
50 | args: release --clean --config goreleaser/release.yml
51 | env:
52 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
53 |
54 | - name: Artifact Linux
55 | uses: actions/upload-artifact@v4
56 | with:
57 | name: build_linux
58 | path: dist/*linux*
59 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # If you prefer the allow list template instead of the deny list, see community template:
2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
3 | #
4 | # Binaries for programs and plugins
5 | *.exe
6 | *.exe~
7 | *.dll
8 | *.so
9 | *.dylib
10 | *.wasm
11 | bin/
12 |
13 | # Test binary, built with `go test -c`
14 | *.test
15 |
16 | # Output of the go coverage tool, specifically when used with LiteIDE
17 | *.out
18 |
19 | # Dependency directories (remove the comment below to include it)
20 | # vendor/
21 |
22 | # Go workspace file
23 | go.work
24 | go.work.sum
25 |
26 | hearchco.*
27 | .vscode/*
28 | test.go
29 |
30 | src/search/engines/*/site/*
31 | !src/search/engines/_engines_test
32 | !src/search/engines/_sedefaults
33 |
34 | log/
35 | database/
36 | profiling/
37 |
38 | # go generate
39 | *_stringer.go
40 | *_enumer.go
41 | *_enginer.go
42 | *_exchanger.go
43 |
44 | # test dump
45 | testdump*
46 | tmp/
47 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | run:
2 | air -- --pretty
3 |
4 | debug:
5 | air -- --pretty -v
6 |
7 | trace:
8 | air -- --pretty -vv
9 |
10 | install:
11 | go get ./...
12 | go install github.com/dmarkham/enumer@latest
13 | go generate ./...
14 | go install github.com/air-verse/air@latest
15 |
16 | compile:
17 | CGO_ENABLED=0 go build -ldflags "-s -w" -trimpath ./src/...
18 | compile-linux:
19 | CGO_ENABLED=0 GOOS=linux go build -ldflags "-s -w" -trimpath -o bin/hearchco ./src
20 | compile-macos:
21 | CGO_ENABLED=0 GOOS=darwin go build -ldflags "-s -w" -trimpath -o bin/hearchco ./src
22 | compile-windows:
23 | CGO_ENABLED=0 GOOS=windows go build -ldflags "-s -w" -trimpath -o bin/hearchco.exe ./src
24 |
25 | test:
26 | sh ./scripts/test.sh
27 | test-engines:
28 | sh ./scripts/test-engines.sh
29 |
30 | test-redis:
31 | sh ./scripts/test-redis.sh
32 | test-redis-podman:
33 | sh ./scripts/test-redis-podman.sh
34 | test-redis-docker:
35 | sh ./scripts/test-redis-docker.sh
36 |
37 | test-dynamodb:
38 | sh ./scripts/test-dynamodb.sh
39 | test-dynamodb-podman:
40 | sh ./scripts/test-dynamodb-podman.sh
41 | test-dynamodb-docker:
42 | sh ./scripts/test-dynamodb-docker.sh
43 |
44 | test-all: test test-redis test-dynamodb test-engines
45 | test-all-podman: test test-redis-podman test-dynamodb-podman test-engines
46 | test-all-docker: test test-redis-docker test-dynamodb-docker test-engines
47 |
48 | update:
49 | go get -u ./...
50 | go mod tidy
51 |
52 | lint:
53 | golangci-lint run
54 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Hearchco agent repository built using Go
2 |
3 | ## Installation
4 |
5 | ### Docker
6 | [https://github.com/hearchco/agent/pkgs/container/agent](https://github.com/hearchco/agent/pkgs/container/agent)
7 |
8 | ```bash
9 | docker pull ghcr.io/hearchco/agent
10 | ```
11 |
12 | ### Binary
13 |
14 | Binary file - Linux
15 |
16 | Download the latest release from the [releases page](https://github.com/hearchco/agent/releases) manually, or automatically like below and set the permissions for the files.
17 |
18 | ```bash
19 | # Replace the 'match' part with your own ARCH
20 | curl -L -o /opt/hearchco <<< echo $(curl -sL https://api.github.com/repos/hearchco/agent/releases/latest | jq -r '.assets[] | select(.name? | match("linux_amd64$")) | .browser_download_url')
21 | ```
22 |
23 | ### Create a user and modify the rights.
24 |
25 | ```bash
26 | sudo useradd --shell /bin/bash --system --user-group hearchco
27 | sudo chown hearchco:hearchco /opt/hearchco
28 | ```
29 |
30 | ## Start/Stop/Status
31 |
32 | ### Create a Systemd Unit
33 |
34 | Save example systemd unit file into `/etc/systemd/system/hearchco.service` [docs](../docs/hearchco.service).
35 |
36 | ### Start the hearchco Service
37 |
38 | Reload the service daemon, start the newly create service and check status.
39 |
40 | ```bash
41 | sudo systemctl daemon-reload
42 | sudo systemctl start hearchco
43 | sudo systemctl status hearchco
44 | ```
45 |
46 | ### Debug
47 |
48 | ```bash
49 | sudo journalctl -u hearchco -b --reverse
50 | ```
51 |
52 | ### Start hearchco on Startup
53 |
54 | ```bash
55 | sudo systemctl enable hearchco.service
56 | ```
57 |
58 |
59 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | We love responsible reports of (potential) security issues in Hearchco.
4 |
5 | You can contact us at [security@hearch.co](mailto:security@hearch.co).
6 |
7 | Be sure to provide as much information as possible and if found
8 | also reproduction steps of the identified vulnerability. Also
9 | add the specific URL of the project as well as code you found
10 | the issue in to your report.
11 |
--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM scratch
2 |
3 | COPY --from=alpine /etc/ssl/certs/ /etc/ssl/certs/
4 |
5 | COPY bootstrap ./hearchco
6 |
7 | # "nobody" user
8 | USER 65534
9 |
10 | ENTRYPOINT ["./hearchco"]
11 |
12 | EXPOSE 8000
13 |
14 | LABEL org.opencontainers.image.source="https://github.com/hearchco/agent"
15 |
--------------------------------------------------------------------------------
/docs/example_category.json:
--------------------------------------------------------------------------------
1 | {
2 | "engines": {
3 | "google": {
4 | "enabled": true,
5 | "required": false,
6 | "requiredbyorigin": true,
7 | "preferred": false,
8 | "preferredbyorigin": false
9 | },
10 | "bing": {
11 | "enabled": true,
12 | "required": false,
13 | "requiredbyorigin": true,
14 | "preferred": false,
15 | "preferredbyorigin": false
16 | },
17 | "brave": {
18 | "enabled": true,
19 | "required": false,
20 | "requiredbyorigin": false,
21 | "preferred": true,
22 | "preferredbyorigin": false
23 | }
24 | },
25 | "ranking": {
26 | "rankexp": 0.5,
27 | "rankmul": 1,
28 | "rankconst": 0,
29 | "rankscoremul": 1,
30 | "rankscoreadd": 0,
31 | "timesreturnedmul": 1,
32 | "timesreturnedadd": 0,
33 | "timesreturnedscoremul": 1,
34 | "timesreturnedscoreadd": 0,
35 | "engines": {
36 | "google": {
37 | "mul": 1,
38 | "add": 0
39 | },
40 | "bing": {
41 | "mul": 1,
42 | "add": 0
43 | },
44 | "brave": {
45 | "mul": 1,
46 | "add": 0
47 | }
48 | }
49 | },
50 | "timings": {
51 | "preferredtimeout": "500",
52 | "hardtimeout": "1500"
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/docs/hearchco.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Agent for Hearchco Metasearch engine built using Go
3 | After=network.target
4 |
5 | [Install]
6 | WantedBy=multi-user.target
7 |
8 | [Service]
9 | # Find & Replace User/Group/Path with your own
10 | User=hearchco
11 | Group=hearchco
12 | Type=simple
13 | ExecStart=/opt/hearchco --verbosity --config-path="hearchco.yaml"
14 | WorkingDirectory=/opt/hearchco
15 | TimeoutStopSec=20
16 | KillMode=process
17 | Restart=on-failure
18 |
19 | # See https://www.freedesktop.org/software/systemd/man/systemd.exec.html
20 | DevicePolicy=closed
21 | NoNewPrivileges=yes
22 | PrivateTmp=yes
23 | #PrivateUsers=yes
24 | ProtectControlGroups=yes
25 | ProtectKernelModules=yes
26 | ProtectKernelTunables=yes
27 | RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
28 | RestrictNamespaces=yes
29 | RestrictRealtime=yes
30 | SystemCallFilter=~@clock @debug @module @mount @obsolete @reboot @setuid @swap
31 | #ReadWritePaths=/opt/hearchco
32 |
33 | # Prevent from accessing any real (physical) devices
34 | PrivateDevices=yes
35 |
36 | # You can change the following line to `strict` instead of `full` if you don't
37 | # want it to be able to write anything on your filesystem outside of $ReadWritePaths.
38 | ProtectSystem=strict
39 |
40 | # You can uncomment the following line if you don't have any media in /home/*.
41 | # This will prevent hearchco from ever reading/writing anything there.
42 | ProtectHome=true
43 |
--------------------------------------------------------------------------------
/generate/enginer/structs.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bytes"
5 | "go/ast"
6 | "go/types"
7 | )
8 |
9 | // Value represents a declared constant.
10 | type Value struct {
11 | originalName string // The name of the constant.
12 | name string // The name with trimmed prefix.
13 | // The value is stored as a bit pattern alone. The boolean tells us
14 | // whether to interpret it as an int64 or a uint64; the only place
15 | // this matters is when sorting.
16 | // Much of the time the str field is all we need; it is printed
17 | // by Value.String.
18 | value uint64 // Will be converted to int64 when needed.
19 | signed bool // Whether the constant is a signed type.
20 | str string // The string representation given by the "go/constant" package.
21 | interfaces []string // The interfaces that the constant implements.
22 | }
23 |
24 | // Generator holds the state of the analysis. Primarily used to buffer
25 | // the output for format.Source.
26 | type Generator struct {
27 | buf bytes.Buffer // Accumulated output.
28 | pkg *Package // Package we are scanning.
29 |
30 | trimPrefix string
31 |
32 | logf func(format string, args ...interface{}) // test logging hook; nil when not testing
33 | }
34 |
35 | // File holds a single parsed file and associated data.
36 | type File struct {
37 | pkg *Package // Package to which this file belongs.
38 | file *ast.File // Parsed AST.
39 | // These fields are reset for each type being generated.
40 | typeName string // Name of the constant type.
41 | values []Value // Accumulator for constant values of that type.
42 |
43 | trimPrefix string
44 | }
45 |
46 | type Package struct {
47 | name string
48 | defs map[*ast.Ident]types.Object
49 | files []*File
50 | }
51 |
--------------------------------------------------------------------------------
/generate/enginer/util.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "log"
5 | "os"
6 | "slices"
7 | "strings"
8 | )
9 |
10 | func validConst(v Value) bool {
11 | lowerName := strings.ToLower(v.name)
12 | return lowerName != "undefined" && isDirectory(lowerName)
13 | }
14 |
15 | func validInterfacer(v Value, interfaceName string) bool {
16 | return slices.Contains(v.interfaces, strings.ToLower(interfaceName))
17 | }
18 |
19 | // isDirectory reports whether the named file is a directory.
20 | func isDirectory(path string) bool {
21 | info, err := os.Stat(path)
22 | if err != nil {
23 | return false
24 | }
25 | return info.IsDir()
26 | }
27 |
28 | func isDirectoryFatal(path string) bool {
29 | info, err := os.Stat(path)
30 | if err != nil {
31 | log.Fatal(err)
32 | // ^FATAL
33 | }
34 | return info.IsDir()
35 | }
36 |
--------------------------------------------------------------------------------
/generate/exchanger/structs.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bytes"
5 | "go/ast"
6 | "go/types"
7 | )
8 |
9 | // Value represents a declared constant.
10 | type Value struct {
11 | originalName string // The name of the constant.
12 | name string // The name with trimmed prefix.
13 | // The value is stored as a bit pattern alone. The boolean tells us
14 | // whether to interpret it as an int64 or a uint64; the only place
15 | // this matters is when sorting.
16 | // Much of the time the str field is all we need; it is printed
17 | // by Value.String.
18 | value uint64 // Will be converted to int64 when needed.
19 | signed bool // Whether the constant is a signed type.
20 | str string // The string representation given by the "go/constant" package.
21 | interfaces []string // The interfaces that the constant implements.
22 | }
23 |
24 | // Generator holds the state of the analysis. Primarily used to buffer
25 | // the output for format.Source.
26 | type Generator struct {
27 | buf bytes.Buffer // Accumulated output.
28 | pkg *Package // Package we are scanning.
29 |
30 | trimPrefix string
31 |
32 | logf func(format string, args ...interface{}) // test logging hook; nil when not testing
33 | }
34 |
35 | // File holds a single parsed file and associated data.
36 | type File struct {
37 | pkg *Package // Package to which this file belongs.
38 | file *ast.File // Parsed AST.
39 | // These fields are reset for each type being generated.
40 | typeName string // Name of the constant type.
41 | values []Value // Accumulator for constant values of that type.
42 |
43 | trimPrefix string
44 | }
45 |
46 | type Package struct {
47 | name string
48 | defs map[*ast.Ident]types.Object
49 | files []*File
50 | }
51 |
--------------------------------------------------------------------------------
/generate/exchanger/util.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "log"
5 | "os"
6 | "strings"
7 | )
8 |
9 | func validConst(v Value) bool {
10 | lowerName := strings.ToLower(v.name)
11 | return lowerName != "undefined" && isDirectory(lowerName)
12 | }
13 |
14 | // isDirectory reports whether the named file is a directory.
15 | func isDirectory(path string) bool {
16 | info, err := os.Stat(path)
17 | if err != nil {
18 | return false
19 | }
20 | return info.IsDir()
21 | }
22 |
23 | func isDirectoryFatal(path string) bool {
24 | info, err := os.Stat(path)
25 | if err != nil {
26 | log.Fatal(err)
27 | // ^FATAL
28 | }
29 | return info.IsDir()
30 | }
31 |
--------------------------------------------------------------------------------
/goreleaser/develop.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | # .goreleaser.yml
4 | project_name: hearchco
5 |
6 | # Build
7 | builds:
8 | - env: [CGO_ENABLED=0]
9 | goos:
10 | - linux
11 | main: ./src
12 | binary: bootstrap
13 | goarch:
14 | - amd64
15 | - arm64
16 | - arm
17 | goarm:
18 | - 7
19 | ldflags:
20 | - -s -w
21 | - -X "main.Version=v{{ .Version }}"
22 | - -X "main.GitCommit={{ .ShortCommit }}"
23 | - -X "main.Timestamp={{ .Timestamp }}"
24 | flags:
25 | - -trimpath
26 |
27 | # MacOS Universal Binaries
28 | universal_binaries:
29 | - replace: true
30 |
31 | # Archive
32 | archives:
33 | - name_template: "{{ .ProjectName }}_v{{ .Version }}_{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}"
34 | format: "binary"
35 |
36 | # Checksum
37 | checksum:
38 | name_template: "checksums.txt"
39 | algorithm: sha512
40 |
41 | # Changelog
42 | changelog:
43 | filters:
44 | exclude:
45 | - "^chore:"
46 | - "^ci:"
47 | - "^docs:"
48 | - "^test:"
49 | - "^merge:"
50 | - "^Merge branch"
51 |
52 | # Docker
53 | dockers:
54 | - image_templates:
55 | - "{{ .Env.REGISTRY }}/{{ .Env.IMAGE_NAME }}:v{{ .Version }}-amd64"
56 | use: buildx
57 | goarch: amd64
58 | dockerfile: ./docker/Dockerfile
59 | build_flag_templates:
60 | - --platform=linux/amd64
61 | extra_files:
62 | - docker
63 |
64 | - image_templates:
65 | - "{{ .Env.REGISTRY }}/{{ .Env.IMAGE_NAME }}:v{{ .Version }}-arm64"
66 | use: buildx
67 | goarch: arm64
68 | dockerfile: ./docker/Dockerfile
69 | build_flag_templates:
70 | - --platform=linux/arm64
71 | extra_files:
72 | - docker
73 |
74 | - image_templates:
75 | - "{{ .Env.REGISTRY }}/{{ .Env.IMAGE_NAME }}:v{{ .Version }}-armv7"
76 | use: buildx
77 | goarch: arm
78 | goarm: 7
79 | dockerfile: ./docker/Dockerfile
80 | build_flag_templates:
81 | - --platform=linux/arm/v7
82 | extra_files:
83 | - docker
84 |
--------------------------------------------------------------------------------
/hearchco_example.yaml:
--------------------------------------------------------------------------------
1 | server:
2 | frontendurls: http://localhost:5173,https://*hearch.co
3 | cache:
4 | type: none
5 | imageproxy:
6 | secretkey: changemepls
7 | # engines:
8 | # google:
9 | # noweb: true # Disables web search for this engine
10 | # noimages: true # Disables image search for this engine
11 | # nosuggestions: true # Disables suggestions for this engine
12 |
--------------------------------------------------------------------------------
/scripts/test-dynamodb-docker.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | export AWS_REGION=hearchco-test-1
4 | export AWS_ACCESS_KEY_ID=hearchco
5 | export AWS_SECRET_ACCESS_KEY=hearchco
6 | export DYNAMODB_TABLE=hearchco_test
7 | export DYNAMODB_ENDPOINT=http://localhost:8000
8 |
9 | docker run --rm --name hearchco-dynamodb -d -p 8000:8000 docker.io/amazon/dynamodb-local && \
10 | sleep 5 && \
11 | aws dynamodb create-table \
12 | --table-name $DYNAMODB_TABLE \
13 | --attribute-definitions AttributeName=Key,AttributeType=S \
14 | --key-schema AttributeName=Key,KeyType=HASH \
15 | --billing-mode PAY_PER_REQUEST \
16 | --endpoint-url $DYNAMODB_ENDPOINT && \
17 | aws dynamodb update-time-to-live \
18 | --table-name $DYNAMODB_TABLE \
19 | --time-to-live-specification "Enabled=true, AttributeName=TTL" \
20 | --endpoint-url $DYNAMODB_ENDPOINT && \
21 | go test $(go list ./... | grep /dynamodb) -count=1
22 |
23 | docker stop hearchco-dynamodb
--------------------------------------------------------------------------------
/scripts/test-dynamodb-podman.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | export AWS_REGION=hearchco-test-1
4 | export AWS_ACCESS_KEY_ID=hearchco
5 | export AWS_SECRET_ACCESS_KEY=hearchco
6 | export DYNAMODB_TABLE=hearchco_test
7 | export DYNAMODB_ENDPOINT=http://localhost:8000
8 |
9 | podman run --rm --name hearchco-dynamodb -d -p 8000:8000 docker.io/amazon/dynamodb-local && \
10 | sleep 5 && \
11 | aws dynamodb create-table \
12 | --table-name $DYNAMODB_TABLE \
13 | --attribute-definitions AttributeName=Key,AttributeType=S \
14 | --key-schema AttributeName=Key,KeyType=HASH \
15 | --billing-mode PAY_PER_REQUEST \
16 | --endpoint-url $DYNAMODB_ENDPOINT && \
17 | aws dynamodb update-time-to-live \
18 | --table-name $DYNAMODB_TABLE \
19 | --time-to-live-specification "Enabled=true, AttributeName=TTL" \
20 | --endpoint-url $DYNAMODB_ENDPOINT && \
21 | go test $(go list ./... | grep /dynamodb) -count=1
22 |
23 | podman stop hearchco-dynamodb
--------------------------------------------------------------------------------
/scripts/test-dynamodb.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | go test $(go list ./... | grep /dynamodb) -count=1
3 |
--------------------------------------------------------------------------------
/scripts/test-engines.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | go test $(go list ./... | grep /engines/)
3 |
--------------------------------------------------------------------------------
/scripts/test-redis-docker.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | docker run --rm --name hearchco-redis -d -p 6379:6379 docker.io/library/redis && \
3 | go test $(go list ./... | grep /redis) -count=1
4 | docker stop hearchco-redis
--------------------------------------------------------------------------------
/scripts/test-redis-podman.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | podman run --rm --name hearchco-redis -d -p 6379:6379 docker.io/library/redis && \
3 | go test $(go list ./... | grep /redis) -count=1
4 | podman stop hearchco-redis
--------------------------------------------------------------------------------
/scripts/test-redis.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | go test $(go list ./... | grep /redis) -count=1
3 |
--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | go test $(go list ./... | grep -v /engines/ | grep -v /redis | grep -v /dynamodb)
3 |
--------------------------------------------------------------------------------
/src/cache/actions_currencies.go:
--------------------------------------------------------------------------------
1 | package cache
2 |
3 | import (
4 | "fmt"
5 | "time"
6 |
7 | "github.com/hearchco/agent/src/exchange/currency"
8 | "github.com/hearchco/agent/src/exchange/engines"
9 | )
10 |
11 | func (db DB) SetCurrencies(base currency.Currency, engs []engines.Name, currencies currency.Currencies, ttl ...time.Duration) error {
12 | key := combineBaseWithExchangeEnginesNames(base, engs)
13 | return db.driver.Set(key, currencies, ttl...)
14 | }
15 |
16 | func (db DB) GetCurrencies(base currency.Currency, engs []engines.Name) (currency.Currencies, error) {
17 | key := combineBaseWithExchangeEnginesNames(base, engs)
18 | var currencies currency.Currencies
19 | err := db.driver.Get(key, ¤cies)
20 | return currencies, err
21 | }
22 |
23 | func (db DB) GetCurrenciesTTL(base currency.Currency, engs []engines.Name) (time.Duration, error) {
24 | key := combineBaseWithExchangeEnginesNames(base, engs)
25 | return db.driver.GetTTL(key)
26 | }
27 |
28 | func combineBaseWithExchangeEnginesNames(base currency.Currency, engs []engines.Name) string {
29 | return fmt.Sprintf("%v_%v", base.String(), combineExchangeEnginesNames(engs))
30 | }
31 |
32 | func combineExchangeEnginesNames(engs []engines.Name) string {
33 | var key string
34 | for i, eng := range engs {
35 | if i == 0 {
36 | key = fmt.Sprintf("%v", eng.String())
37 | } else {
38 | key = fmt.Sprintf("%v_%v", key, eng.String())
39 | }
40 | }
41 | return key
42 | }
43 |
--------------------------------------------------------------------------------
/src/cache/db.go:
--------------------------------------------------------------------------------
1 | package cache
2 |
3 | import (
4 | "context"
5 | "fmt"
6 |
7 | "github.com/rs/zerolog/log"
8 |
9 | "github.com/hearchco/agent/src/cache/dynamodb"
10 | "github.com/hearchco/agent/src/cache/nocache"
11 | "github.com/hearchco/agent/src/cache/redis"
12 | "github.com/hearchco/agent/src/config"
13 | )
14 |
15 | type DB struct {
16 | driver Driver
17 | }
18 |
19 | func New(ctx context.Context, cacheConf config.Cache) (DB, error) {
20 | var drv Driver
21 | var err error
22 |
23 | switch cacheConf.Type {
24 | case "redis":
25 | drv, err = redis.New(ctx, cacheConf.KeyPrefix, cacheConf.Redis)
26 | if err != nil {
27 | err = fmt.Errorf("failed creating a redis cache: %w", err)
28 | }
29 | case "dynamodb":
30 | drv, err = dynamodb.New(ctx, cacheConf.KeyPrefix, cacheConf.DynamoDB)
31 | if err != nil {
32 | err = fmt.Errorf("failed creating a dynamodb cache: %w", err)
33 | }
34 | default:
35 | drv, err = nocache.New()
36 | if err != nil {
37 | err = fmt.Errorf("failed creating a nocache: %w", err)
38 | }
39 | log.Warn().Msg("Running without caching!")
40 | }
41 |
42 | return DB{drv}, err
43 | }
44 |
45 | func (db DB) Close() {
46 | db.driver.Close()
47 | }
48 |
--------------------------------------------------------------------------------
/src/cache/driver.go:
--------------------------------------------------------------------------------
1 | package cache
2 |
3 | import (
4 | "time"
5 | )
6 |
7 | type Driver interface {
8 | Close()
9 | Set(k string, v any, ttl ...time.Duration) error
10 | Get(k string, o any) error
11 | GetTTL(k string) (time.Duration, error)
12 | }
13 |
--------------------------------------------------------------------------------
/src/cache/nocache/nocache.go:
--------------------------------------------------------------------------------
1 | package nocache
2 |
3 | import (
4 | "time"
5 | )
6 |
7 | type DRV struct{}
8 |
9 | func New() (DRV, error) { return DRV{}, nil }
10 |
11 | func (drv DRV) Close() {}
12 |
13 | func (drv DRV) Set(k string, v any, ttl ...time.Duration) error { return nil }
14 |
15 | func (drv DRV) Get(k string, o any) error { return nil }
16 |
17 | func (drv DRV) GetTTL(k string) (time.Duration, error) { return 0, nil }
18 |
--------------------------------------------------------------------------------
/src/cache/nocache/nocache_test.go:
--------------------------------------------------------------------------------
1 | package nocache
2 |
3 | import (
4 | "testing"
5 | )
6 |
7 | func TestNew(t *testing.T) {
8 | _, err := New()
9 | if err != nil {
10 | t.Errorf("error creating nocache: %v", err)
11 | }
12 | }
13 |
14 | func TestClose(t *testing.T) {
15 | db, err := New()
16 | if err != nil {
17 | t.Errorf("error creating nocache: %v", err)
18 | }
19 |
20 | db.Close()
21 | }
22 |
23 | func TestSet(t *testing.T) {
24 | db, err := New()
25 | if err != nil {
26 | t.Errorf("error creating nocache: %v", err)
27 | }
28 |
29 | defer db.Close()
30 |
31 | err = db.Set("testkey", "testvalue")
32 | if err != nil {
33 | t.Errorf("error setting key-value pair: %v", err)
34 | }
35 | }
36 |
37 | func TestSetTTL(t *testing.T) {
38 | db, err := New()
39 | if err != nil {
40 | t.Errorf("error creating nocache: %v", err)
41 | }
42 |
43 | defer db.Close()
44 |
45 | err = db.Set("testkey", "testvalue", 1)
46 | if err != nil {
47 | t.Errorf("error setting key-value pair with TTL: %v", err)
48 | }
49 | }
50 |
51 | func TestGet(t *testing.T) {
52 | db, err := New()
53 | if err != nil {
54 | t.Errorf("error creating nocache: %v", err)
55 | }
56 |
57 | defer db.Close()
58 |
59 | err = db.Set("testkey", "testvalue")
60 | if err != nil {
61 | t.Errorf("error setting key-value pair: %v", err)
62 | }
63 |
64 | var value string = "testvalue"
65 | err = db.Get("testkey", &value)
66 | if err != nil {
67 | t.Errorf("error getting value: %v", err)
68 | }
69 |
70 | if value != "testvalue" {
71 | t.Errorf("expected value: testvalue, got: %v", value)
72 | }
73 | }
74 |
75 | func TestGetTTL(t *testing.T) {
76 | db, err := New()
77 | if err != nil {
78 | t.Errorf("error creating nocache: %v", err)
79 | }
80 |
81 | defer db.Close()
82 |
83 | err = db.Set("testkey", "testvalue", 1)
84 | if err != nil {
85 | t.Errorf("error setting key-value pair with TTL: %v", err)
86 | }
87 |
88 | ttl, err := db.GetTTL("testkey")
89 | if err != nil {
90 | t.Errorf("error getting TTL: %v", err)
91 | }
92 |
93 | if ttl != 0 {
94 | t.Errorf("expected TTL: 0, got: %v", ttl)
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/src/cli/flags.go:
--------------------------------------------------------------------------------
1 | package cli
2 |
3 | type Flags struct {
4 | Version versionFlag `name:"version" help:"Print version information and quit"`
5 | Pretty bool `type:"bool" default:"false" env:"HEARCHCO_PRETTY" help:"Make logs pretty"`
6 | Verbosity int8 `type:"counter" default:"0" short:"v" env:"HEARCHCO_VERBOSITY" help:"Log level verbosity"`
7 | ConfigPath string `type:"path" default:"hearchco.yaml" env:"HEARCHCO_CONFIG_PATH" help:"Config file path"`
8 |
9 | Profiler
10 | }
11 |
12 | type Profiler struct {
13 | ProfilerServe bool `type:"bool" default:"false" help:"Run the profiler and serve at /debug/pprof/ http endpoint"`
14 | ProfilerCPU bool `type:"bool" default:"false" help:"Use cpu profiling"`
15 | ProfilerHeap bool `type:"bool" default:"false" help:"Use heap profiling"`
16 | ProfilerGOR bool `type:"bool" default:"false" help:"Use goroutine profiling"`
17 | ProfilerThread bool `type:"bool" default:"false" help:"Use threadcreate profiling"`
18 | ProfilerAlloc bool `type:"bool" default:"false" help:"Use alloc profiling"`
19 | ProfilerBlock bool `type:"bool" default:"false" help:"Use block profiling"`
20 | ProfilerMutex bool `type:"bool" default:"false" help:"Use mutex profiling"`
21 | ProfilerClock bool `type:"bool" default:"false" help:"Use clock profiling"`
22 | ProfilerTrace bool `type:"bool" default:"false" help:"Use trace profiling"`
23 | }
24 |
--------------------------------------------------------------------------------
/src/cli/setup.go:
--------------------------------------------------------------------------------
1 | package cli
2 |
3 | import (
4 | "github.com/alecthomas/kong"
5 | "github.com/rs/zerolog/log"
6 | )
7 |
8 | // Returns flags struct from parsed cli arguments.
9 | func Setup(ver string, timestamp string, commit string) (Flags, string) {
10 | verStruct := version{
11 | ver: ver,
12 | timestamp: timestamp,
13 | commit: commit,
14 | }
15 |
16 | var cli Flags
17 | ctx := kong.Parse(&cli,
18 | kong.Name("hearchco"),
19 | kong.Description("Fastasst metasearch engine"),
20 | kong.UsageOnError(),
21 | kong.ConfigureHelp(kong.HelpOptions{
22 | Summary: true,
23 | Compact: true,
24 | }),
25 | kong.Vars{
26 | "version": verStruct.String(),
27 | },
28 | )
29 |
30 | if err := ctx.Validate(); err != nil {
31 | log.Panic().
32 | Caller().
33 | Err(err).
34 | Msg("Failed parsing cli")
35 | // ^PANIC
36 | }
37 |
38 | return cli, verStruct.String()
39 | }
40 |
--------------------------------------------------------------------------------
/src/cli/version.go:
--------------------------------------------------------------------------------
1 | package cli
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/alecthomas/kong"
7 | )
8 |
9 | type versionFlag string
10 |
11 | func (v versionFlag) Decode(ctx *kong.DecodeContext) error { return nil }
12 | func (v versionFlag) IsBool() bool { return true }
13 | func (v versionFlag) BeforeApply(app *kong.Kong, vars kong.Vars) error {
14 | fmt.Println(vars["version"])
15 | app.Exit(0)
16 | return nil
17 | }
18 |
19 | type version struct {
20 | ver string
21 | timestamp string
22 | commit string
23 | }
24 |
25 | func (v version) String() string {
26 | if v.ver == "" {
27 | return "dev"
28 | } else {
29 | return fmt.Sprintf("%v (%v@%v)", v.ver, v.commit, v.timestamp)
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/config/defaults.go:
--------------------------------------------------------------------------------
1 | package config
2 |
3 | import (
4 | "time"
5 |
6 | exchengines "github.com/hearchco/agent/src/exchange/engines"
7 | "github.com/hearchco/agent/src/search/engines"
8 | "github.com/hearchco/agent/src/utils/moretime"
9 | )
10 |
11 | func New() Config {
12 | return Config{
13 | Server: Server{
14 | Environment: "normal",
15 | Port: 8000,
16 | FrontendUrls: []string{"http://localhost:5173"},
17 | Cache: Cache{
18 | Type: "none",
19 | KeyPrefix: "HEARCHCO_",
20 | TTL: TTL{
21 | Currencies: moretime.Day,
22 | },
23 | Redis: Redis{
24 | Host: "localhost",
25 | Port: 6379,
26 | },
27 | DynamoDB: DynamoDB{
28 | Table: "hearchco",
29 | },
30 | },
31 | ImageProxy: ImageProxy{
32 | Timeout: 3 * time.Second,
33 | },
34 | },
35 | Engines: EngineConfig{
36 | NoWeb: []engines.Name{},
37 | NoImages: []engines.Name{},
38 | NoSuggestions: []engines.Name{},
39 | },
40 | Exchange: Exchange{
41 | BaseCurrency: "EUR",
42 | Engines: []exchengines.Name{
43 | exchengines.CURRENCYAPI,
44 | exchengines.EXCHANGERATEAPI,
45 | exchengines.FRANKFURTER,
46 | },
47 | Timings: ExchangeTimings{
48 | HardTimeout: 1 * time.Second,
49 | },
50 | },
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/config/structs_config.go:
--------------------------------------------------------------------------------
1 | package config
2 |
3 | // ReaderConfig is format in which the config is read from the config file and environment variables.
4 | type ReaderConfig struct {
5 | Server ReaderServer `koanf:"server"`
6 | REngines map[string]ReaderEngineConfig `koanf:"engines"`
7 | RExchange ReaderExchange `koanf:"exchange"`
8 | }
9 | type Config struct {
10 | Server Server
11 | Engines EngineConfig
12 | Exchange Exchange
13 | }
14 |
--------------------------------------------------------------------------------
/src/config/structs_engines.go:
--------------------------------------------------------------------------------
1 | package config
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | // ReaderEngineConfig is format in which the config is read from the config file and environment variables.
8 | // Used to disable certain search types for an engine. By default, all types are enabled.
9 | type ReaderEngineConfig struct {
10 | NoWeb bool `koanf:"noweb"` // Whether this engine is disallowed to do web searches.
11 | NoImages bool `koanf:"noimages"` // Whether this engine is disallowed to do image searches.
12 | NoSuggestions bool `koanf:"nosuggestions"` // Whether this engine is disallowed to do suggestion searches.
13 | }
14 |
15 | // Slices of disabled engines for each search type, by default these are empty.
16 | type EngineConfig struct {
17 | NoWeb []engines.Name
18 | NoImages []engines.Name
19 | NoSuggestions []engines.Name
20 | }
21 |
--------------------------------------------------------------------------------
/src/config/structs_exchange.go:
--------------------------------------------------------------------------------
1 | package config
2 |
3 | import (
4 | "time"
5 |
6 | "github.com/hearchco/agent/src/exchange/currency"
7 | "github.com/hearchco/agent/src/exchange/engines"
8 | )
9 |
10 | // ReaderCategory is format in which the config is read from the config file and environment variables.
11 | type ReaderExchange struct {
12 | BaseCurrency string `koanf:"basecurrency"`
13 | REngines map[string]ReaderExchangeEngine `koanf:"engines"`
14 | RTimings ReaderExchangeTimings `koanf:"timings"`
15 | }
16 | type Exchange struct {
17 | BaseCurrency currency.Currency
18 | Engines []engines.Name
19 | Timings ExchangeTimings
20 | }
21 |
22 | // ReaderEngine is format in which the config is read from the config file and environment variables.
23 | type ReaderExchangeEngine struct {
24 | // If false, the engine will not be used.
25 | Enabled bool `koanf:"enabled"`
26 | }
27 |
28 | // ReaderTimings is format in which the config is read from the config file and environment variables.
29 | // In format.
30 | // Example: 1s, 1m, 1h, 1d, 1w, 1M, 1y.
31 | // If unit is not specified, it is assumed to be milliseconds.
32 | type ReaderExchangeTimings struct {
33 | // Hard timeout after which the search is forcefully stopped (even if the engines didn't respond).
34 | HardTimeout string `koanf:"hardtimeout"`
35 | }
36 | type ExchangeTimings struct {
37 | // Hard timeout after which the search is forcefully stopped (even if the engines didn't respond).
38 | HardTimeout time.Duration
39 | }
40 |
--------------------------------------------------------------------------------
/src/exchange/currency/currency.go:
--------------------------------------------------------------------------------
1 | package currency
2 |
3 | import (
4 | "fmt"
5 | "slices"
6 | "strings"
7 |
8 | "github.com/rs/zerolog/log"
9 | )
10 |
11 | // Format: ISO 4217 (3-letter code) e.g. CHF, EUR, GBP, USD.
12 | type Currency string
13 |
14 | func (c Currency) String() string {
15 | return string(c)
16 | }
17 |
18 | func (c Currency) Lower() string {
19 | return strings.ToLower(c.String())
20 | }
21 |
22 | func Convert(curr string) (Currency, error) {
23 | if len(curr) != 3 {
24 | return "", fmt.Errorf("currency code must be 3 characters long")
25 | }
26 |
27 | upperCurr := strings.ToUpper(curr)
28 | return Currency(upperCurr), nil
29 | }
30 |
31 | func ConvertBase(curr string) Currency {
32 | // Hardcoded to ensure all APIs include these currencies and therefore work as expected.
33 | supportedBaseCurrencies := [...]string{"CHF", "EUR", "GBP", "USD"}
34 |
35 | upperCurr := strings.ToUpper(curr)
36 | if !slices.Contains(supportedBaseCurrencies[:], upperCurr) {
37 | log.Panic().
38 | Str("currency", upperCurr).
39 | Msg("unsupported base currency")
40 | // ^PANIC
41 | }
42 |
43 | return Currency(upperCurr)
44 | }
45 |
--------------------------------------------------------------------------------
/src/exchange/currency/map.go:
--------------------------------------------------------------------------------
1 | package currency
2 |
3 | import (
4 | "sync"
5 | )
6 |
7 | type Currencies map[Currency]float64
8 |
9 | type CurrencyMap struct {
10 | currs map[Currency][]float64
11 | lock sync.RWMutex
12 | }
13 |
14 | func NewCurrencyMap() CurrencyMap {
15 | return CurrencyMap{
16 | currs: make(map[Currency][]float64),
17 | }
18 | }
19 |
20 | func (c *CurrencyMap) Append(currs Currencies) {
21 | c.lock.Lock()
22 | defer c.lock.Unlock()
23 |
24 | for curr, rate := range currs {
25 | c.currs[curr] = append(c.currs[curr], rate)
26 | }
27 | }
28 |
29 | func (c *CurrencyMap) Extract() Currencies {
30 | c.lock.RLock()
31 | defer c.lock.RUnlock()
32 |
33 | avg := make(Currencies)
34 | for curr, rates := range c.currs {
35 | var sum float64
36 | for _, rate := range rates {
37 | sum += rate
38 | }
39 | avg[curr] = sum / float64(len(rates))
40 | }
41 | return avg
42 | }
43 |
--------------------------------------------------------------------------------
/src/exchange/engines/currencyapi/exchange.go:
--------------------------------------------------------------------------------
1 | package currencyapi
2 |
3 | import (
4 | "fmt"
5 | "io"
6 | "net/http"
7 |
8 | "github.com/rs/zerolog/log"
9 |
10 | "github.com/hearchco/agent/src/exchange/currency"
11 | )
12 |
13 | func (e Exchange) Exchange(base currency.Currency) (currency.Currencies, error) {
14 | // Get data from the API.
15 | api := e.apiUrlWithBaseCurrency(base)
16 | resp, err := http.Get(api)
17 | if err != nil {
18 | return nil, fmt.Errorf("failed to get data from %s: %w", api, err)
19 | }
20 |
21 | // Read the response body.
22 | body, err := io.ReadAll(resp.Body)
23 | if err != nil {
24 | return nil, fmt.Errorf("failed to read response body: %w", err)
25 | }
26 |
27 | // Unmarshal the response.
28 | dataRates, err := e.extractRates(string(body), base)
29 | if err != nil {
30 | return nil, fmt.Errorf("failed to extract rates from response: %w", err)
31 | }
32 |
33 | // Check if no rates were found.
34 | if len(dataRates) == 0 {
35 | return nil, fmt.Errorf("no rates found for %s", base)
36 | }
37 |
38 | // Convert the rates to proper currency types with their rates.
39 | rates := make(currency.Currencies, len(dataRates))
40 | for currS, rate := range dataRates {
41 | curr, err := currency.Convert(currS)
42 | if err != nil {
43 | // Non-ISO currencies are expected from this engine.
44 | log.Trace().
45 | Err(err).
46 | Str("currency", currS).
47 | Msg("failed to convert currency")
48 | continue
49 | }
50 | rates[curr] = rate
51 | }
52 |
53 | // Set the base currency rate to 1.
54 | rates[base] = 1
55 |
56 | return rates, nil
57 | }
58 |
--------------------------------------------------------------------------------
/src/exchange/engines/currencyapi/info.go:
--------------------------------------------------------------------------------
1 | package currencyapi
2 |
3 | const (
4 | // Needs to have /.json at the end
5 | apiUrl = "https://cdn.jsdelivr.net/npm/@fawazahmed0/currency-api@2024-03-06/v1/currencies"
6 | )
7 |
--------------------------------------------------------------------------------
/src/exchange/engines/currencyapi/json.go:
--------------------------------------------------------------------------------
1 | package currencyapi
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "regexp"
7 | "strings"
8 |
9 | "github.com/hearchco/agent/src/exchange/currency"
10 | )
11 |
12 | // Rates field is named the same as base currency.
13 | func (e Exchange) extractRates(resp string, base currency.Currency) (map[string]float64, error) {
14 | pattern := `"` + base.Lower() + `":\s*{[^}]*}`
15 | regexp := regexp.MustCompile(pattern)
16 | match := regexp.FindString(resp)
17 | if match == "" {
18 | return nil, fmt.Errorf("could not find JSON field for base currency %s", base)
19 | }
20 |
21 | // Remove `"":`` from the match
22 | jsonRates := strings.TrimSpace((match[len(base.Lower())+3:]))
23 |
24 | var rates map[string]float64
25 | if err := json.Unmarshal([]byte(jsonRates), &rates); err != nil {
26 | return nil, fmt.Errorf("could not unmarshal JSON field for base currency %s: %w", base, err)
27 | }
28 |
29 | return rates, nil
30 | }
31 |
--------------------------------------------------------------------------------
/src/exchange/engines/currencyapi/new.go:
--------------------------------------------------------------------------------
1 | package currencyapi
2 |
3 | import (
4 | "github.com/hearchco/agent/src/exchange/currency"
5 | )
6 |
7 | type Exchange struct{}
8 |
9 | func New() Exchange {
10 | return Exchange{}
11 | }
12 |
13 | func (e Exchange) apiUrlWithBaseCurrency(base currency.Currency) string {
14 | return apiUrl + "/" + base.Lower() + ".json"
15 | }
16 |
--------------------------------------------------------------------------------
/src/exchange/engines/currencyapi/note.md:
--------------------------------------------------------------------------------
1 | Includes a lot of currencies (and crypto) that aren's in ISO format so errors in logs are to be expected.
2 |
--------------------------------------------------------------------------------
/src/exchange/engines/exchanger.go:
--------------------------------------------------------------------------------
1 | package engines
2 |
3 | import (
4 | "github.com/hearchco/agent/src/exchange/currency"
5 | )
6 |
7 | type Exchanger interface {
8 | Exchange(base currency.Currency) (currency.Currencies, error)
9 | }
10 |
--------------------------------------------------------------------------------
/src/exchange/engines/exchangerateapi/exchange.go:
--------------------------------------------------------------------------------
1 | package exchangerateapi
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "io"
7 | "net/http"
8 |
9 | "github.com/rs/zerolog/log"
10 |
11 | "github.com/hearchco/agent/src/exchange/currency"
12 | )
13 |
14 | func (e Exchange) Exchange(base currency.Currency) (currency.Currencies, error) {
15 | // Get data from the API.
16 | api := e.apiUrlWithBaseCurrency(base)
17 | resp, err := http.Get(api)
18 | if err != nil {
19 | return nil, fmt.Errorf("failed to get data from %s: %w", api, err)
20 | }
21 |
22 | // Read the response body.
23 | body, err := io.ReadAll(resp.Body)
24 | if err != nil {
25 | return nil, fmt.Errorf("failed to read response body: %w", err)
26 | }
27 |
28 | // Unmarshal the response.
29 | var data response
30 | if err := json.Unmarshal(body, &data); err != nil {
31 | return nil, fmt.Errorf("failed to unmarshal response: %w", err)
32 | }
33 |
34 | // Check if no rates were found.
35 | if len(data.Rates) == 0 {
36 | return nil, fmt.Errorf("no rates found for %s", base)
37 | }
38 |
39 | // Convert the rates to proper currency types with their rates.
40 | rates := make(currency.Currencies, len(data.Rates))
41 | for currS, rate := range data.Rates {
42 | curr, err := currency.Convert(currS)
43 | if err != nil {
44 | log.Error().
45 | Err(err).
46 | Str("currency", currS).
47 | Msg("failed to convert currency")
48 | continue
49 | }
50 | rates[curr] = rate
51 | }
52 |
53 | // Set the base currency rate to 1.
54 | rates[base] = 1
55 |
56 | return rates, nil
57 | }
58 |
--------------------------------------------------------------------------------
/src/exchange/engines/exchangerateapi/info.go:
--------------------------------------------------------------------------------
1 | package exchangerateapi
2 |
3 | const (
4 | // Needs to have / at the end
5 | apiUrl = "https://open.er-api.com/v6/latest"
6 | )
7 |
--------------------------------------------------------------------------------
/src/exchange/engines/exchangerateapi/json.go:
--------------------------------------------------------------------------------
1 | package exchangerateapi
2 |
3 | type response struct {
4 | Rates map[string]float64 `json:"rates"`
5 | }
6 |
--------------------------------------------------------------------------------
/src/exchange/engines/exchangerateapi/new.go:
--------------------------------------------------------------------------------
1 | package exchangerateapi
2 |
3 | import (
4 | "github.com/hearchco/agent/src/exchange/currency"
5 | )
6 |
7 | type Exchange struct{}
8 |
9 | func New() Exchange {
10 | return Exchange{}
11 | }
12 |
13 | func (e Exchange) apiUrlWithBaseCurrency(base currency.Currency) string {
14 | return apiUrl + "/" + base.String()
15 | }
16 |
--------------------------------------------------------------------------------
/src/exchange/engines/frankfurter/exchange.go:
--------------------------------------------------------------------------------
1 | package frankfurter
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "io"
7 | "net/http"
8 |
9 | "github.com/rs/zerolog/log"
10 |
11 | "github.com/hearchco/agent/src/exchange/currency"
12 | )
13 |
14 | func (e Exchange) Exchange(base currency.Currency) (currency.Currencies, error) {
15 | // Get data from the API.
16 | api := e.apiUrlWithBaseCurrency(base)
17 | resp, err := http.Get(api)
18 | if err != nil {
19 | return nil, fmt.Errorf("failed to get data from %s: %w", api, err)
20 | }
21 |
22 | // Read the response body.
23 | body, err := io.ReadAll(resp.Body)
24 | if err != nil {
25 | return nil, fmt.Errorf("failed to read response body: %w", err)
26 | }
27 |
28 | // Unmarshal the response.
29 | var data response
30 | if err := json.Unmarshal(body, &data); err != nil {
31 | return nil, fmt.Errorf("failed to unmarshal response: %w", err)
32 | }
33 |
34 | // Check if no rates were found.
35 | if len(data.Rates) == 0 {
36 | return nil, fmt.Errorf("no rates found for %s", base)
37 | }
38 |
39 | // Convert the rates to proper currency types with their rates.
40 | rates := make(currency.Currencies, len(data.Rates))
41 | for currS, rate := range data.Rates {
42 | curr, err := currency.Convert(currS)
43 | if err != nil {
44 | log.Error().
45 | Err(err).
46 | Str("currency", currS).
47 | Msg("failed to convert currency")
48 | continue
49 | }
50 | rates[curr] = rate
51 | }
52 |
53 | // Set the base currency rate to 1.
54 | rates[base] = 1
55 |
56 | return rates, nil
57 | }
58 |
--------------------------------------------------------------------------------
/src/exchange/engines/frankfurter/info.go:
--------------------------------------------------------------------------------
1 | package frankfurter
2 |
3 | const (
4 | // Needs to have ?from= at the end
5 | apiUrl = "https://api.frankfurter.app/latest"
6 | )
7 |
--------------------------------------------------------------------------------
/src/exchange/engines/frankfurter/json.go:
--------------------------------------------------------------------------------
1 | package frankfurter
2 |
3 | // Rates doesn't include the base currency.
4 | type response struct {
5 | Rates map[string]float64 `json:"rates"`
6 | }
7 |
--------------------------------------------------------------------------------
/src/exchange/engines/frankfurter/new.go:
--------------------------------------------------------------------------------
1 | package frankfurter
2 |
3 | import (
4 | "github.com/hearchco/agent/src/exchange/currency"
5 | )
6 |
7 | type Exchange struct{}
8 |
9 | func New() Exchange {
10 | return Exchange{}
11 | }
12 |
13 | func (e Exchange) apiUrlWithBaseCurrency(base currency.Currency) string {
14 | return apiUrl + "?from=" + base.String()
15 | }
16 |
--------------------------------------------------------------------------------
/src/exchange/engines/name.go:
--------------------------------------------------------------------------------
1 | package engines
2 |
3 | import (
4 | "strings"
5 | )
6 |
7 | type Name int
8 |
9 | //go:generate enumer -type=Name -json -text -sql
10 | //go:generate go run github.com/hearchco/agent/generate/exchanger -type=Name -packagename exchange -output ../engine_exchanger.go
11 | const (
12 | UNDEFINED Name = iota
13 | CURRENCYAPI
14 | EXCHANGERATEAPI
15 | FRANKFURTER
16 | )
17 |
18 | // Returns engine names without UNDEFINED.
19 | func Names() []Name {
20 | return _NameValues[1:]
21 | }
22 |
23 | func (n Name) ToLower() string {
24 | return strings.ToLower(n.String())
25 | }
26 |
--------------------------------------------------------------------------------
/src/logger/setup.go:
--------------------------------------------------------------------------------
1 | package logger
2 |
3 | import (
4 | "os"
5 | "time"
6 |
7 | "github.com/rs/zerolog"
8 | "github.com/rs/zerolog/log"
9 | )
10 |
11 | func Setup(verbosity int8, pretty bool) zerolog.Logger {
12 | // Setup logger.
13 | var l zerolog.Logger
14 | if pretty {
15 | // This is much slower to print.
16 | l = log.Output(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.Stamp})
17 | } else {
18 | l = zerolog.New(os.Stderr).With().Timestamp().Logger()
19 | }
20 |
21 | // Setup verbosity.
22 | switch {
23 | case verbosity > 1: // TRACE
24 | l = l.With().Caller().Logger().Level(zerolog.TraceLevel)
25 | case verbosity == 1: // DEBUG
26 | l = l.Level(zerolog.DebugLevel)
27 | default: // INFO
28 | l = l.Level(zerolog.InfoLevel)
29 | }
30 |
31 | // Set the logger to be global.
32 | log.Logger = l
33 | return l
34 | }
35 |
--------------------------------------------------------------------------------
/src/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "os"
6 | "os/signal"
7 | "syscall"
8 | _ "time/tzdata"
9 |
10 | "github.com/rs/zerolog/log"
11 |
12 | "github.com/hearchco/agent/src/cache"
13 | "github.com/hearchco/agent/src/cli"
14 | "github.com/hearchco/agent/src/config"
15 | "github.com/hearchco/agent/src/logger"
16 | "github.com/hearchco/agent/src/profiler"
17 | "github.com/hearchco/agent/src/router"
18 | )
19 |
20 | var (
21 | // Release variables.
22 | Version string
23 | Timestamp string
24 | GitCommit string
25 | )
26 |
27 | func main() {
28 | // Setup signal interrupt (CTRL+C).
29 | ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM)
30 | defer cancel()
31 |
32 | // Parse cli flags.
33 | cliFlags, ver := cli.Setup(Version, Timestamp, GitCommit)
34 |
35 | // Configure logger.
36 | lgr := logger.Setup(cliFlags.Verbosity, cliFlags.Pretty)
37 |
38 | // Load config file.
39 | conf := config.New()
40 | conf.Load(cliFlags.ConfigPath)
41 |
42 | // Setup cache database.
43 | db, err := cache.New(ctx, conf.Server.Cache)
44 | if err != nil {
45 | log.Fatal().
46 | Caller().
47 | Err(err).
48 | Msg("Failed creating a new cache database")
49 | // ^FATAL
50 | }
51 | defer db.Close()
52 |
53 | // Start profiler if enabled.
54 | _, stopProfiler := profiler.Run(cliFlags)
55 | defer stopProfiler()
56 |
57 | // Start router.
58 | rw := router.New(lgr, conf, db, cliFlags.ProfilerServe, ver)
59 | switch conf.Server.Environment {
60 | case "lambda":
61 | rw.StartLambda()
62 | default:
63 | rw.Start(ctx)
64 | }
65 |
66 | log.Info().Msg("Program finished")
67 | }
68 |
--------------------------------------------------------------------------------
/src/profiler/run.go:
--------------------------------------------------------------------------------
1 | package profiler
2 |
3 | import (
4 | "github.com/pkg/profile"
5 | "github.com/rs/zerolog/log"
6 |
7 | "github.com/hearchco/agent/src/cli"
8 | )
9 |
10 | type profiler struct {
11 | enabled bool
12 | profile func(p *profile.Profile)
13 | }
14 |
15 | func Run(cliFlags cli.Flags) (bool, func()) {
16 | /*
17 | goroutine — stack traces of all current goroutines
18 | heap — a sampling of memory allocations of live objects
19 | allocs — a sampling of all past memory allocations
20 | threadcreate — stack traces that led to the creation of new OS threads
21 | block — stack traces that led to blocking on synchronization primitives
22 | mutex — stack traces of holders of contended mutexes
23 | */
24 |
25 | profilers := [...]profiler{{
26 | enabled: cliFlags.ProfilerCPU,
27 | profile: profile.CPUProfile,
28 | }, {
29 | enabled: cliFlags.ProfilerHeap,
30 | profile: profile.MemProfileHeap,
31 | }, {
32 | enabled: cliFlags.ProfilerGOR,
33 | profile: profile.GoroutineProfile,
34 | }, {
35 | enabled: cliFlags.ProfilerThread,
36 | profile: profile.ThreadcreationProfile,
37 | }, {
38 | enabled: cliFlags.ProfilerBlock,
39 | profile: profile.BlockProfile,
40 | }, {
41 | enabled: cliFlags.ProfilerAlloc,
42 | profile: profile.MemProfileAllocs,
43 | }, {
44 | enabled: cliFlags.ProfilerMutex,
45 | profile: profile.MutexProfile,
46 | }, {
47 | enabled: cliFlags.ProfilerClock,
48 | profile: profile.ClockProfile,
49 | }, {
50 | enabled: cliFlags.ProfilerTrace,
51 | profile: profile.TraceProfile,
52 | }}
53 |
54 | profilerToRun := profiler{enabled: false}
55 | for _, p := range profilers {
56 | if profilerToRun.enabled && p.enabled {
57 | log.Fatal().
58 | Caller().
59 | Msg("Only one profiler can be run at a time")
60 | // ^FATAL
61 | } else if p.enabled {
62 | profilerToRun = p
63 | }
64 | }
65 | if !profilerToRun.enabled {
66 | return false, func() {}
67 | }
68 |
69 | p := profile.Start(profilerToRun.profile, profile.ProfilePath("./profiling/"), profile.NoShutdownHook)
70 | return true, func() {
71 | p.Stop()
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/router/lambda.go:
--------------------------------------------------------------------------------
1 | package router
2 |
3 | import (
4 | "context"
5 |
6 | "github.com/aws/aws-lambda-go/events"
7 | "github.com/aws/aws-lambda-go/lambda"
8 | chiadapter "github.com/awslabs/aws-lambda-go-api-proxy/chi"
9 | )
10 |
11 | func (rw RouterWrapper) StartLambda() {
12 | lambda.Start(func(ctx context.Context, req events.APIGatewayV2HTTPRequest) (events.APIGatewayV2HTTPResponse, error) {
13 | return chiadapter.NewV2(rw.mux).ProxyWithContextV2(ctx, req)
14 | })
15 | }
16 |
--------------------------------------------------------------------------------
/src/router/middlewares/compress.go:
--------------------------------------------------------------------------------
1 | package middlewares
2 |
3 | import (
4 | "io"
5 | "net/http"
6 |
7 | "github.com/andybalholm/brotli"
8 | "github.com/go-chi/chi/v5/middleware"
9 | "github.com/klauspost/compress/zstd"
10 | "github.com/rs/zerolog/log"
11 | )
12 |
13 | func compress(lvl int, types ...string) func(next http.Handler) http.Handler {
14 | // Already has deflate and gzip.
15 | comp := middleware.NewCompressor(lvl, types...)
16 |
17 | // Add brotli.
18 | comp.SetEncoder("br", func(w io.Writer, lvl int) io.Writer {
19 | return brotli.NewWriterOptions(w, brotli.WriterOptions{
20 | Quality: lvl,
21 | })
22 | })
23 |
24 | // Add zstd.
25 | comp.SetEncoder("zstd", func(w io.Writer, lvl int) io.Writer {
26 | writer, err := zstd.NewWriter(w, zstd.WithEncoderLevel(zstd.EncoderLevel(lvl)))
27 | if err != nil {
28 | log.Panic().Err(err).Msg("Failed to create zstd writer")
29 | }
30 | return writer
31 | })
32 |
33 | return comp.Handler
34 | }
35 |
--------------------------------------------------------------------------------
/src/router/middlewares/logging.go:
--------------------------------------------------------------------------------
1 | package middlewares
2 |
3 | import (
4 | "net/http"
5 | "time"
6 |
7 | "github.com/rs/zerolog"
8 | "github.com/rs/zerolog/hlog"
9 | )
10 |
11 | func ignoredPath(p string, skipPaths []string) bool {
12 | for _, sp := range skipPaths {
13 | if sp == p {
14 | return true
15 | }
16 | }
17 | return false
18 | }
19 |
20 | func zerologMiddleware(lgr zerolog.Logger, skipPaths []string) [](func(http.Handler) http.Handler) {
21 | newHandler := hlog.NewHandler(lgr)
22 | fieldsHandler := hlog.AccessHandler(func(r *http.Request, status int, size int, duration time.Duration) {
23 | // Skip logging for ignored paths.
24 | if ignoredPath(r.URL.Path, skipPaths) {
25 | return
26 | }
27 |
28 | lgr := hlog.FromRequest(r)
29 | event := lgr.Info()
30 | if status >= 500 {
31 | event = lgr.Error()
32 | } else if status >= 400 {
33 | event = lgr.Warn()
34 | }
35 |
36 | event.
37 | Str("method", r.Method).
38 | Str("path", r.URL.Path).
39 | Int("status", status).
40 | Dur("duration", duration).
41 | Str("ip", r.RemoteAddr).
42 | Msg("Request")
43 | })
44 |
45 | return [](func(http.Handler) http.Handler){
46 | newHandler,
47 | fieldsHandler,
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/router/middlewares/setup.go:
--------------------------------------------------------------------------------
1 | package middlewares
2 |
3 | import (
4 | "net/http"
5 | "strings"
6 |
7 | "github.com/go-chi/chi/v5"
8 | "github.com/go-chi/chi/v5/middleware"
9 | "github.com/go-chi/cors"
10 | "github.com/rs/zerolog"
11 | "github.com/rs/zerolog/log"
12 | )
13 |
14 | func Setup(mux *chi.Mux, lgr zerolog.Logger, frontendUrls []string, serveProfiler bool) {
15 | // Use custom zerolog middleware.
16 | skipPaths := []string{"/healthz", "/versionz"}
17 | mux.Use(zerologMiddleware(lgr, skipPaths)...)
18 |
19 | // Use recovery middleware.
20 | mux.Use(middleware.Recoverer)
21 |
22 | // Use compression middleware, except for image proxy since the response is copied over.
23 | mux.Use(middleware.Maybe(compress(3), func(r *http.Request) bool {
24 | return !strings.HasPrefix(r.URL.Path, "/proxy")
25 | }))
26 |
27 | // Use CORS middleware.
28 | mux.Use(cors.Handler(cors.Options{
29 | AllowedOrigins: frontendUrls,
30 | AllowedMethods: []string{"GET", "POST", "OPTIONS"},
31 | AllowedHeaders: []string{
32 | "Accept",
33 | "Accept-Encoding",
34 | "Accept-Language",
35 | "Access-Control-Request-Headers",
36 | "Access-Control-Request-Method",
37 | "Origin",
38 | },
39 | AllowCredentials: false,
40 | MaxAge: 300,
41 | }))
42 |
43 | log.Debug().
44 | Strs("url", frontendUrls).
45 | Msg("Using CORS")
46 |
47 | // Use strip slashes middleware, except for pprof.
48 | mux.Use(middleware.Maybe(middleware.StripSlashes, func(r *http.Request) bool {
49 | return !strings.HasPrefix(r.URL.Path, "/debug")
50 | }))
51 |
52 | // Use pprof router if profiling is enabled.
53 | if serveProfiler {
54 | mux.Mount("/debug", middleware.Profiler())
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/src/router/router.go:
--------------------------------------------------------------------------------
1 | package router
2 |
3 | import (
4 | "context"
5 | "net/http"
6 | "strconv"
7 | "time"
8 |
9 | "github.com/go-chi/chi/v5"
10 | "github.com/rs/zerolog"
11 | "github.com/rs/zerolog/log"
12 |
13 | "github.com/hearchco/agent/src/cache"
14 | "github.com/hearchco/agent/src/config"
15 | "github.com/hearchco/agent/src/router/middlewares"
16 | "github.com/hearchco/agent/src/router/routes"
17 | )
18 |
19 | type RouterWrapper struct {
20 | mux *chi.Mux
21 | port int
22 | }
23 |
24 | func New(lgr zerolog.Logger, conf config.Config, db cache.DB, serveProfiler bool, version string) RouterWrapper {
25 | mux := chi.NewRouter()
26 |
27 | middlewares.Setup(mux, lgr, conf.Server.FrontendUrls, serveProfiler)
28 | routes.Setup(mux, version, db, conf)
29 |
30 | return RouterWrapper{mux: mux, port: conf.Server.Port}
31 | }
32 |
33 | func (rw RouterWrapper) Start(ctx context.Context) {
34 | // Create server.
35 | srv := http.Server{
36 | Addr: ":" + strconv.Itoa(rw.port),
37 | Handler: rw.mux,
38 | }
39 |
40 | log.Info().
41 | Int("port", rw.port).
42 | Msg("Starting server")
43 |
44 | // Shut down server gracefully on context cancellation.
45 | go func() {
46 | <-ctx.Done()
47 | log.Info().Msg("Shutting down server")
48 |
49 | // Create a context with timeout of 5 seconds.
50 | timeout, cancel := context.WithTimeout(context.Background(), 5*time.Second)
51 | defer cancel()
52 |
53 | // Shutdown gracefully.
54 | // After the timeout is reached, server will be shut down forcefully.
55 | err := srv.Shutdown(timeout)
56 | if err != nil {
57 | log.Error().
58 | Caller().
59 | Err(err).
60 | Msg("Server shut down failed")
61 | } else {
62 | log.Info().
63 | Msg("Server shut down")
64 | }
65 | }()
66 |
67 | // Start server.
68 | err := srv.ListenAndServe()
69 | if err != nil && err != http.ErrServerClosed {
70 | log.Fatal().
71 | Caller().
72 | Err(err).
73 | Msg("Failed to start server")
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/src/router/routes/params.go:
--------------------------------------------------------------------------------
1 | package routes
2 |
3 | import (
4 | "net/url"
5 | )
6 |
7 | func getParamOrDefault(params url.Values, key string, fallback ...string) string {
8 | val := params.Get(key)
9 | if val == "" && len(fallback) > 0 {
10 | return fallback[0]
11 | }
12 | return val
13 | }
14 |
--------------------------------------------------------------------------------
/src/router/routes/responses.go:
--------------------------------------------------------------------------------
1 | package routes
2 |
3 | import (
4 | "github.com/hearchco/agent/src/exchange/currency"
5 | "github.com/hearchco/agent/src/search/result"
6 | )
7 |
8 | type ErrorResponse struct {
9 | Message string `json:"message"`
10 | Value string `json:"value"`
11 | }
12 |
13 | type responseBase struct {
14 | Version string `json:"version"`
15 | Duration int64 `json:"duration"`
16 | }
17 |
18 | type ResultsResponse struct {
19 | responseBase
20 |
21 | Results []result.ResultOutput `json:"results"`
22 | }
23 |
24 | type SuggestionsResponse struct {
25 | responseBase
26 |
27 | Suggestions []result.Suggestion `json:"suggestions"`
28 | }
29 |
30 | type ExchangeResponse struct {
31 | responseBase
32 |
33 | Base currency.Currency `json:"base"`
34 | From currency.Currency `json:"from"`
35 | To currency.Currency `json:"to"`
36 | Amount float64 `json:"amount"`
37 | Result float64 `json:"result"`
38 | }
39 |
40 | type CurrenciesResponse struct {
41 | responseBase
42 |
43 | Base currency.Currency `json:"base"`
44 | Currencies currency.Currencies `json:"currencies"`
45 | }
46 |
--------------------------------------------------------------------------------
/src/router/routes/route_currencies.go:
--------------------------------------------------------------------------------
1 | package routes
2 |
3 | import (
4 | "fmt"
5 | "net/http"
6 | "time"
7 |
8 | "github.com/hearchco/agent/src/cache"
9 | "github.com/hearchco/agent/src/config"
10 | "github.com/hearchco/agent/src/exchange"
11 | "github.com/rs/zerolog/log"
12 | )
13 |
14 | func routeCurrencies(w http.ResponseWriter, ver string, conf config.Exchange, db cache.DB, ttl time.Duration) error {
15 | // Capture start time.
16 | startTime := time.Now()
17 |
18 | // Get the cached currencies.
19 | currencies, err := db.GetCurrencies(conf.BaseCurrency, conf.Engines)
20 | if err != nil {
21 | log.Error().
22 | Err(err).
23 | Str("base", conf.BaseCurrency.String()).
24 | Str("engines", fmt.Sprintf("%v", conf.Engines)).
25 | Msg("Error while getting currencies from cache")
26 | }
27 |
28 | // Create the exchange.
29 | var exch exchange.Exchange
30 | if currencies == nil {
31 | // Fetch the currencies from the enabled engines.
32 | exch = exchange.NewExchange(conf)
33 | // Cache the currencies if any have been fetched.
34 | if len(exch.Currencies()) > 0 {
35 | err := db.SetCurrencies(conf.BaseCurrency, conf.Engines, exch.Currencies(), ttl)
36 | if err != nil {
37 | log.Error().
38 | Err(err).
39 | Str("base", conf.BaseCurrency.String()).
40 | Str("engines", fmt.Sprintf("%v", conf.Engines)).
41 | Msg("Error while setting currencies in cache")
42 | }
43 | }
44 | } else {
45 | // Use the cached currencies.
46 | exch = exchange.NewExchange(conf, currencies)
47 | }
48 |
49 | return writeResponseJSON(w, http.StatusOK, CurrenciesResponse{
50 | responseBase{
51 | ver,
52 | time.Since(startTime).Milliseconds(),
53 | },
54 | conf.BaseCurrency,
55 | exch.Currencies(),
56 | })
57 | }
58 |
--------------------------------------------------------------------------------
/src/router/routes/writers.go:
--------------------------------------------------------------------------------
1 | package routes
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "io"
7 | "net/http"
8 | )
9 |
10 | func writeResponse(w http.ResponseWriter, status int, body string) error {
11 | w.WriteHeader(status)
12 | _, err := w.Write([]byte(body))
13 | return err
14 | }
15 |
16 | func writeResponseJSON(w http.ResponseWriter, status int, body any) error {
17 | res, err := json.Marshal(body)
18 | if err != nil {
19 | w.WriteHeader(http.StatusInternalServerError)
20 | _, werr := w.Write([]byte("internal server error"))
21 | if werr != nil {
22 | return fmt.Errorf("%w: %w", werr, err)
23 | }
24 | return err
25 | }
26 |
27 | w.Header().Set("Content-Type", "application/json")
28 | w.WriteHeader(status)
29 | _, err = w.Write(res)
30 | return err
31 | }
32 |
33 | func writeResponseSuggestions(w http.ResponseWriter, status int, query string, suggestions []string) error {
34 | jsonStruct := [...]any{query, suggestions}
35 | res, err := json.Marshal(jsonStruct)
36 | if err != nil {
37 | w.WriteHeader(http.StatusInternalServerError)
38 | _, werr := w.Write([]byte("internal server error"))
39 | if werr != nil {
40 | return fmt.Errorf("%w: %w", werr, err)
41 | }
42 | return err
43 | }
44 |
45 | w.Header().Set("Content-Type", "application/x-suggestions+json")
46 | w.WriteHeader(status)
47 | _, err = w.Write(res)
48 | return err
49 | }
50 |
51 | func writeResponseImageProxy(w http.ResponseWriter, resp *http.Response) error {
52 | if ce := resp.Header.Get("Content-Encoding"); ce != "" {
53 | w.Header().Set("Content-Encoding", ce)
54 | }
55 |
56 | if cl := resp.Header.Get("Content-Length"); cl != "" {
57 | w.Header().Set("Content-Length", cl)
58 | }
59 |
60 | if ct := resp.Header.Get("Content-Type"); ct != "" {
61 | w.Header().Set("Content-Type", ct)
62 | }
63 |
64 | w.WriteHeader(resp.StatusCode)
65 | _, err := io.Copy(w, resp.Body)
66 | return err
67 | }
68 |
--------------------------------------------------------------------------------
/src/search/category/disabled.go:
--------------------------------------------------------------------------------
1 | package category
2 |
3 | import (
4 | "slices"
5 |
6 | "github.com/hearchco/agent/src/search/engines"
7 | )
8 |
9 | // Returns true if the category contains any disabled engines.
10 | // Otherwise, returns false.
11 | func (c Category) ContainsDisabledEngines(disabledEngines []engines.Name) bool {
12 | for _, eng := range disabledEngines {
13 | if slices.Contains(c.Engines, eng) {
14 | return true
15 | }
16 | }
17 |
18 | return false
19 | }
20 |
--------------------------------------------------------------------------------
/src/search/category/type.go:
--------------------------------------------------------------------------------
1 | package category
2 |
3 | import (
4 | "time"
5 |
6 | "github.com/hearchco/agent/src/search/engines"
7 | )
8 |
9 | type Category struct {
10 | Engines []engines.Name
11 | RequiredEngines []engines.Name
12 | RequiredByOriginEngines []engines.Name
13 | PreferredEngines []engines.Name
14 | PreferredByOriginEngines []engines.Name
15 | Ranking Ranking
16 | Timings Timings
17 | }
18 |
19 | type Ranking struct {
20 | // The exponent, multiplier and addition used on the rank itself.
21 | RankExp float64
22 | RankMul float64
23 | RankAdd float64
24 | // The multiplier and addition used on the rank score (number calculated from dividing 100 with the rank + above variables applied).
25 | RankScoreMul float64
26 | RankScoreAdd float64
27 | // The multiplier and addition used on the number of times the result was returned.
28 | TimesReturnedMul float64
29 | TimesReturnedAdd float64
30 | // The multiplier and addition used on the times returned score (number calculated from doing log(timesReturnedNum + above variables applied)).
31 | TimesReturnedScoreMul float64
32 | TimesReturnedScoreAdd float64
33 | // Multipliers and additions for each engine, applied to the rank score.
34 | Engines map[engines.Name]EngineRanking
35 | }
36 |
37 | type EngineRanking struct {
38 | Mul float64
39 | Add float64
40 | }
41 |
42 | type Timings struct {
43 | // Maximum amount of time to wait for the PreferredEngines (or ByOrigin) to respond.
44 | // If the search is still waiting for the RequiredEngines (or ByOrigin) after this time, the search will continue.
45 | PreferredTimeout time.Duration
46 | // Hard timeout after which the search is forcefully stopped (even if the engines didn't respond).
47 | HardTimeout time.Duration
48 | }
49 |
--------------------------------------------------------------------------------
/src/search/context_cancel.go:
--------------------------------------------------------------------------------
1 | package search
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "sync"
7 | "time"
8 |
9 | "github.com/rs/zerolog/log"
10 |
11 | "github.com/hearchco/agent/src/search/engines"
12 | "github.com/hearchco/agent/src/utils/anonymize"
13 | )
14 |
15 | // Hard timeout is associated with the required engines.
16 | func cancelHardTimeout(start time.Time, cancel context.CancelFunc, query string, wgEngs *sync.WaitGroup, engs []engines.Name, wgByOriginEngs *sync.WaitGroup, byOriginEngs []engines.Name) {
17 | groupNames := [...]string{groupRequired, groupRequiredByOrigin}
18 | cancelTimeout(groupNames, start, cancel, query, wgEngs, engs, wgByOriginEngs, byOriginEngs)
19 | }
20 |
21 | // Preferred timeout is associated with the preferred engines.
22 | func cancelPreferredTimeout(start time.Time, cancel context.CancelFunc, query string, wgEngs *sync.WaitGroup, engs []engines.Name, wgByOriginEngs *sync.WaitGroup, byOriginEngs []engines.Name) {
23 | groupNames := [...]string{groupPreferred, groupPreferredByOrigin}
24 | cancelTimeout(groupNames, start, cancel, query, wgEngs, engs, wgByOriginEngs, byOriginEngs)
25 | }
26 |
27 | // Cancel timeout for the provided engines.
28 | func cancelTimeout(groupNames [2]string, start time.Time, cancel context.CancelFunc, query string, wgEngs *sync.WaitGroup, engs []engines.Name, wgByOriginEngs *sync.WaitGroup, byOriginEngs []engines.Name) {
29 | var wg sync.WaitGroup
30 |
31 | // Wait for all required engines to finish.
32 | wg.Add(1)
33 | go func() {
34 | defer wg.Done()
35 | wgEngs.Wait()
36 | log.Debug().
37 | Str("query", anonymize.String(query)).
38 | Str("group", groupNames[0]).
39 | Str("engines", fmt.Sprintf("%v", engs)).
40 | Dur("duration", time.Since(start)).
41 | Msg("Scraping group finished")
42 | }()
43 |
44 | // Wait for all required by origin engines to finish.
45 | wg.Add(1)
46 | go func() {
47 | defer wg.Done()
48 | wgByOriginEngs.Wait()
49 | log.Debug().
50 | Str("query", anonymize.String(query)).
51 | Str("group", groupNames[1]).
52 | Str("engines", fmt.Sprintf("%v", byOriginEngs)).
53 | Dur("duration", time.Since(start)).
54 | Msg("Scraping group finished")
55 | }()
56 |
57 | wg.Wait()
58 | cancel()
59 | }
60 |
--------------------------------------------------------------------------------
/src/search/engines/_engines_test/s_images.go:
--------------------------------------------------------------------------------
1 | package _engines_test
2 |
3 | import (
4 | "context"
5 | "strings"
6 | "testing"
7 |
8 | "github.com/hearchco/agent/src/search/result"
9 | "github.com/hearchco/agent/src/search/scraper"
10 | )
11 |
12 | func CheckImageSearch(t *testing.T, e scraper.ImageSearcher, tchar []TestCaseHasAnyResults, tccr []TestCaseContainsResults, tcrr []TestCaseRankedResults) {
13 | // TestCaseHasAnyResults
14 | for _, tc := range tchar {
15 | e.InitSearcher(context.Background())
16 |
17 | resChan := make(chan result.ResultScraped, 100)
18 | go e.ImageSearch(tc.Query, tc.Options, resChan)
19 |
20 | results := make([]result.ResultScraped, 0)
21 | for r := range resChan {
22 | results = append(results, r)
23 | }
24 |
25 | if len(results) == 0 {
26 | defer t.Errorf("Got no results for %q", tc.Query)
27 | }
28 | }
29 |
30 | // TestCaseContainsResults
31 | for _, tc := range tccr {
32 | e.InitSearcher(context.Background())
33 |
34 | resChan := make(chan result.ResultScraped, 100)
35 | go e.ImageSearch(tc.Query, tc.Options, resChan)
36 |
37 | results := make([]result.ResultScraped, 0)
38 | for r := range resChan {
39 | results = append(results, r)
40 | }
41 |
42 | if len(results) == 0 {
43 | defer t.Errorf("Got no results for %q", tc.Query)
44 | } else {
45 | for _, rURL := range tc.ResultURLs {
46 | found := false
47 |
48 | for _, r := range results {
49 | if strings.Contains(r.URL(), rURL) {
50 | found = true
51 | break
52 | }
53 | }
54 |
55 | if !found {
56 | defer t.Errorf("Couldn't find %q (%q).\nThe results: %q", rURL, tc.Query, results)
57 | }
58 | }
59 | }
60 | }
61 |
62 | // TestCaseRankedResults
63 | for _, tc := range tcrr {
64 | e.InitSearcher(context.Background())
65 |
66 | resChan := make(chan result.ResultScraped, 100)
67 | go e.ImageSearch(tc.Query, tc.Options, resChan)
68 |
69 | results := make([]result.ResultScraped, 0)
70 | for r := range resChan {
71 | results = append(results, r)
72 | }
73 |
74 | if len(results) == 0 {
75 | defer t.Errorf("Got no results for %q", tc.Query)
76 | } else if len(results) < len(tc.ResultURLs) {
77 | defer t.Errorf("Number of results is less than test case URLs.")
78 | } else {
79 | for i, rURL := range tc.ResultURLs {
80 | if !strings.Contains(results[i].URL(), rURL) {
81 | defer t.Errorf("Wrong result on rank %q: %q (%q).\nThe results: %q", i+1, rURL, tc.Query, results)
82 | }
83 | }
84 | }
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/src/search/engines/_engines_test/s_suggestions.go:
--------------------------------------------------------------------------------
1 | package _engines_test
2 |
3 | import (
4 | "strings"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/result"
8 | "github.com/hearchco/agent/src/search/scraper"
9 | )
10 |
11 | func CheckSuggest(t *testing.T, e scraper.Suggester, q string) {
12 | sugChan := make(chan result.SuggestionScraped)
13 | go func() {
14 | err, found := e.Suggest(q, NewOpts(), sugChan)
15 | if len(err) > 0 || !found {
16 | t.Errorf("Failed to get suggestions: %v", err)
17 | }
18 | }()
19 |
20 | suggs := make([]string, 0, 10)
21 | for sug := range sugChan {
22 | suggs = append(suggs, sug.Value())
23 | }
24 | if len(suggs) == 0 {
25 | t.Errorf("No suggestions returned")
26 | }
27 |
28 | for _, s := range suggs {
29 | if s == "" {
30 | t.Errorf("Empty suggestion")
31 | } else if !strings.Contains(s, q) {
32 | t.Errorf("Suggestion doesn't contain query (%q): %q", q, s)
33 | }
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/search/engines/_engines_test/s_web.go:
--------------------------------------------------------------------------------
1 | package _engines_test
2 |
3 | import (
4 | "context"
5 | "strings"
6 | "testing"
7 |
8 | "github.com/hearchco/agent/src/search/result"
9 | "github.com/hearchco/agent/src/search/scraper"
10 | )
11 |
12 | func CheckWebSearch(t *testing.T, e scraper.WebSearcher, tchar []TestCaseHasAnyResults, tccr []TestCaseContainsResults, tcrr []TestCaseRankedResults) {
13 | // TestCaseHasAnyResults
14 | for _, tc := range tchar {
15 | e.InitSearcher(context.Background())
16 |
17 | resChan := make(chan result.ResultScraped, 100)
18 | go e.WebSearch(tc.Query, tc.Options, resChan)
19 |
20 | results := make([]result.ResultScraped, 0)
21 | for r := range resChan {
22 | results = append(results, r)
23 | }
24 |
25 | if len(results) == 0 {
26 | defer t.Errorf("Got no results for %q", tc.Query)
27 | }
28 | }
29 |
30 | // TestCaseContainsResults
31 | for _, tc := range tccr {
32 | e.InitSearcher(context.Background())
33 |
34 | resChan := make(chan result.ResultScraped, 100)
35 | go e.WebSearch(tc.Query, tc.Options, resChan)
36 |
37 | results := make([]result.ResultScraped, 0)
38 | for r := range resChan {
39 | results = append(results, r)
40 | }
41 |
42 | if len(results) == 0 {
43 | defer t.Errorf("Got no results for %q", tc.Query)
44 | } else {
45 | for _, rURL := range tc.ResultURLs {
46 | found := false
47 |
48 | for _, r := range results {
49 | if strings.Contains(r.URL(), rURL) {
50 | found = true
51 | break
52 | }
53 | }
54 |
55 | if !found {
56 | defer t.Errorf("Couldn't find %q (%q).\nThe results: %q", rURL, tc.Query, results)
57 | }
58 | }
59 | }
60 | }
61 |
62 | // TestCaseRankedResults
63 | for _, tc := range tcrr {
64 | e.InitSearcher(context.Background())
65 |
66 | resChan := make(chan result.ResultScraped, 100)
67 | go e.WebSearch(tc.Query, tc.Options, resChan)
68 |
69 | results := make([]result.ResultScraped, 0)
70 | for r := range resChan {
71 | results = append(results, r)
72 | }
73 |
74 | if len(results) == 0 {
75 | defer t.Errorf("Got no results for %q", tc.Query)
76 | } else if len(results) < len(tc.ResultURLs) {
77 | defer t.Errorf("Number of results is less than test case URLs.")
78 | } else {
79 | for i, rURL := range tc.ResultURLs {
80 | if !strings.Contains(results[i].URL(), rURL) {
81 | defer t.Errorf("Wrong result on rank %q: %q (%q).\nThe results: %q", i+1, rURL, tc.Query, results)
82 | }
83 | }
84 | }
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/src/search/engines/_engines_test/structs.go:
--------------------------------------------------------------------------------
1 | package _engines_test
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines/options"
5 | )
6 |
7 | type TestCaseHasAnyResults struct {
8 | Query string
9 | Options options.Options
10 | }
11 |
12 | type TestCaseContainsResults struct {
13 | Query string
14 | ResultURLs []string
15 | Options options.Options
16 | }
17 |
18 | type TestCaseRankedResults struct {
19 | Query string
20 | ResultURLs []string
21 | Options options.Options
22 | }
23 |
24 | func NewOpts() options.Options {
25 | return options.Options{
26 | Pages: options.Pages{Start: 0, Max: 1},
27 | Locale: options.LocaleDefault,
28 | SafeSearch: false,
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/src/search/engines/bing/bing.md:
--------------------------------------------------------------------------------
1 | # Bing
2 |
3 | Getting the URL from dom.Find("div.tpcn div.tptxt cite") doesn't work since it may be truncated.
4 |
5 | Telemetry example:
6 | https://www.bing.com/ck/a?!&&p=23fcb82b91411b05JmltdHM9MTY5MTEwNzIwMCZpZ3VpZD0xMTkyOTg3ZC03OWUyLTY1YTgtMWYzOC04YjFlNzg0NTY0NWYmaW5zaWQ9NTI3OQ&ptn=3&hsh=3&fclid=1192987d-79e2-65a8-1f38-8b1e7845645f&u=a1aHR0cHM6Ly93d3cuaW50ZXJuYXRpb25zLm9yZy9tYWdhemluZS90b3AtMTAtaG9iYmllcy15b3UtdmUtbmV2ZXItaGVhcmQtb2YtMzk3ODQ&ntb=1
7 |
8 | goes to:
9 | https://www.internations.org/magazine/top-10-hobbies-you-ve-never-heard-of-39784
10 |
11 | Description fetching could be improved for complicated results.
12 |
13 | `&setlang=en&cc=us` are the UI language and region parameters respectively.
14 |
--------------------------------------------------------------------------------
/src/search/engines/bing/dompaths.go:
--------------------------------------------------------------------------------
1 | package bing
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | var dompaths = scraper.DOMPaths{
8 | Result: "ol#b_results > li.b_algo",
9 | URL: "h2 > a",
10 | Title: "h2 > a",
11 | Description: "div.b_caption",
12 | }
13 |
14 | type thumbnailDomPaths struct {
15 | Path string
16 | Height string
17 | Width string
18 | }
19 |
20 | type metadataDomPaths struct {
21 | Path string
22 | Attr string
23 | }
24 |
25 | type bingImagesDomPaths struct {
26 | Result string
27 | Metadata metadataDomPaths
28 | Title string
29 | ImgFormatStr string
30 | Thumbnail [3]thumbnailDomPaths
31 | Source string
32 | }
33 |
34 | var imgDompaths = bingImagesDomPaths{
35 | // aria-live is also a possible attribute for not()
36 | Result: "ul.dgControl_list > li[data-idx] > div.iuscp:not([vrhatt])",
37 | Metadata: metadataDomPaths{
38 | Path: "a.iusc",
39 | Attr: "m",
40 | },
41 | Title: "div.infnmpt > div > ul > li > a",
42 | ImgFormatStr: "div.imgpt > div > span",
43 | Thumbnail: [...]thumbnailDomPaths{
44 | {
45 | Path: "a.iusc > div > img.mimg",
46 | Height: "height",
47 | Width: "width",
48 | },
49 | {
50 | Path: "a.iusc > div > div > div.mimg > div",
51 | Height: "data-height",
52 | Width: "data-width",
53 | },
54 | {
55 | Path: "a.iusc > div > div > div.mimg > img",
56 | Height: "height",
57 | Width: "width",
58 | },
59 | },
60 | Source: "div.imgpt > div.img_info > div.lnkw > a",
61 | }
62 |
--------------------------------------------------------------------------------
/src/search/engines/bing/info.go:
--------------------------------------------------------------------------------
1 | package bing
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | const (
8 | seName = engines.BING
9 | searchURL = "https://www.bing.com/search"
10 | imageSearchURL = "https://www.bing.com/images/async"
11 | )
12 |
13 | var origins = [...]engines.Name{seName}
14 |
--------------------------------------------------------------------------------
/src/search/engines/bing/json.go:
--------------------------------------------------------------------------------
1 | package bing
2 |
3 | type imgJsonMetadata struct {
4 | PageURL string `json:"purl"`
5 | ThumbnailURL string `json:"turl"`
6 | ImageURL string `json:"murl"`
7 | Desc string `json:"desc"`
8 | }
9 |
--------------------------------------------------------------------------------
/src/search/engines/bing/new.go:
--------------------------------------------------------------------------------
1 | package bing
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | type Engine struct {
8 | scraper.EngineBase
9 | }
10 |
11 | func New() *Engine {
12 | return &Engine{scraper.EngineBase{
13 | Name: seName,
14 | Origins: origins[:],
15 | }}
16 | }
17 |
--------------------------------------------------------------------------------
/src/search/engines/bing/params.go:
--------------------------------------------------------------------------------
1 | package bing
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 |
7 | "github.com/hearchco/agent/src/search/engines/options"
8 | )
9 |
10 | const (
11 | // Variables params.
12 | paramQueryK = "q"
13 | paramPageK = "first"
14 | paramLocaleK = "setlang" // Should be first 2 characters of Locale.
15 | paramLocaleSecK = "cc" // Should be last 2 characters of Locale.
16 | // paramSafeSearchK = "" // Always enabled.
17 |
18 | // Image variable params.
19 | imgCookieLocaleK = "m"
20 | imgCookieLocaleSecK = "u"
21 | imgCookieLocaleAltK = "mkt"
22 | imgCookieLocaleAltSecK = "ui"
23 |
24 | // Image constant params.
25 | imgParamAsyncK, imgParamAsyncV = "async", "1"
26 | imgParamCountK, imgParamCountV = "count", "35"
27 | )
28 |
29 | func localeParamValues(locale options.Locale) (string, string) {
30 | spl := strings.SplitN(strings.ToLower(locale.String()), "_", 2)
31 | return spl[0], spl[1]
32 | }
33 |
34 | func localeCookieString(locale options.Locale) string {
35 | spl := strings.SplitN(strings.ToLower(locale.String()), "_", 2)
36 | return fmt.Sprintf("%v=%v&%v=%v", imgCookieLocaleK, spl[1], imgCookieLocaleSecK, spl[0])
37 | }
38 |
39 | func localeAltCookieString(locale options.Locale) string {
40 | spl := strings.SplitN(strings.ToLower(locale.String()), "_", 2)
41 | return fmt.Sprintf("%v=%v&%v=%v", imgCookieLocaleAltK, spl[1], imgCookieLocaleAltSecK, spl[0])
42 | }
43 |
--------------------------------------------------------------------------------
/src/search/engines/bing/s_images_test.go:
--------------------------------------------------------------------------------
1 | package bing
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestImageSearch(t *testing.T) {
11 | // Testing options.
12 | opt := _engines_test.NewOpts()
13 |
14 | // Test cases.
15 | tchar := []_engines_test.TestCaseHasAnyResults{{
16 | Query: "ping",
17 | Options: opt,
18 | }}
19 |
20 | tccr := []_engines_test.TestCaseContainsResults{{
21 | Query: "wikipedia logo",
22 | ResultURLs: []string{"upload.wikimedia.org"},
23 | Options: opt,
24 | }}
25 |
26 | tcrr := []_engines_test.TestCaseRankedResults{{
27 | Query: "linux logo wikipedia",
28 | ResultURLs: []string{"logos-world.net"},
29 | Options: opt,
30 | }}
31 |
32 | se := New()
33 | se.InitSearcher(context.Background())
34 |
35 | _engines_test.CheckImageSearch(t, se, tchar[:], tccr[:], tcrr[:])
36 | }
37 |
--------------------------------------------------------------------------------
/src/search/engines/bing/s_web_test.go:
--------------------------------------------------------------------------------
1 | package bing
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestWebSearch(t *testing.T) {
11 | // Testing options.
12 | opt := _engines_test.NewOpts()
13 |
14 | // Test cases.
15 | tchar := []_engines_test.TestCaseHasAnyResults{{
16 | Query: "ping",
17 | Options: opt,
18 | }}
19 |
20 | tccr := []_engines_test.TestCaseContainsResults{{
21 | Query: "facebook",
22 | ResultURLs: []string{"facebook.com"},
23 | Options: opt,
24 | }}
25 |
26 | tcrr := []_engines_test.TestCaseRankedResults{{
27 | Query: "wikipedia",
28 | ResultURLs: []string{"wikipedia."},
29 | Options: opt,
30 | }}
31 |
32 | se := New()
33 | se.InitSearcher(context.Background())
34 |
35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr)
36 | }
37 |
--------------------------------------------------------------------------------
/src/search/engines/bing/telemetry.go:
--------------------------------------------------------------------------------
1 | package bing
2 |
3 | import (
4 | "encoding/base64"
5 | "fmt"
6 | "net/url"
7 | "strings"
8 | )
9 |
10 | func removeTelemetry(urll string) (string, error) {
11 | if !strings.HasPrefix(urll, "https://www.bing.com/ck/a?") {
12 | return urll, nil
13 | }
14 |
15 | parsedUrl, err := url.Parse(urll)
16 | if err != nil {
17 | return "", fmt.Errorf("failed parsing URL: %w", err)
18 | }
19 |
20 | // Get the first value of "u" parameter and remove "a1" from the beginning.
21 | encodedUrl := parsedUrl.Query().Get("u")[2:]
22 |
23 | cleanUrl, err := base64.RawURLEncoding.DecodeString(encodedUrl)
24 | if err != nil {
25 | return "", fmt.Errorf("failed decoding base64: %w", err)
26 | }
27 |
28 | return string(cleanUrl), nil
29 | }
30 |
--------------------------------------------------------------------------------
/src/search/engines/brave/dompaths.go:
--------------------------------------------------------------------------------
1 | package brave
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | var dompaths = scraper.DOMPaths{
8 | Result: "div.snippet[data-type=\"web\"]",
9 | URL: "a",
10 | Title: "div.title",
11 | Description: "div.snippet-description",
12 | }
13 |
--------------------------------------------------------------------------------
/src/search/engines/brave/info.go:
--------------------------------------------------------------------------------
1 | package brave
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | const (
8 | seName = engines.BRAVE
9 | searchURL = "https://search.brave.com/search"
10 | )
11 |
12 | var origins = [...]engines.Name{seName}
13 |
--------------------------------------------------------------------------------
/src/search/engines/brave/new.go:
--------------------------------------------------------------------------------
1 | package brave
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | type Engine struct {
8 | scraper.EngineBase
9 | }
10 |
11 | func New() *Engine {
12 | return &Engine{scraper.EngineBase{
13 | Name: seName,
14 | Origins: origins[:],
15 | }}
16 | }
17 |
--------------------------------------------------------------------------------
/src/search/engines/brave/params.go:
--------------------------------------------------------------------------------
1 | package brave
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 |
7 | "github.com/hearchco/agent/src/search/engines/options"
8 | )
9 |
10 | const (
11 | // Variable params.
12 | paramQueryK = "q"
13 | paramPageK = "offset"
14 | cookieLocaleK = "country" // Should be last 2 characters of Locale.
15 | cookieSafeSearchK = "safesearch" // Can be "off" or "strict".
16 |
17 | // Constant params.
18 | paramSourceK, paramSourceV = "source", "web"
19 | paramSpellcheckK, paramSpellcheckV = "spellcheck", "0"
20 | )
21 |
22 | func localeCookieString(locale options.Locale) string {
23 | region := strings.SplitN(strings.ToLower(locale.String()), "_", 2)[1]
24 | return fmt.Sprintf("%v=%v", cookieLocaleK, region)
25 | }
26 |
27 | func safeSearchCookieString(safesearch bool) string {
28 | if safesearch {
29 | return fmt.Sprintf("%v=%v", cookieSafeSearchK, "strict")
30 | } else {
31 | return fmt.Sprintf("%v=%v", cookieSafeSearchK, "off")
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/src/search/engines/brave/s_web_test.go:
--------------------------------------------------------------------------------
1 | package brave
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestWebSearch(t *testing.T) {
11 | // Testing options.
12 | opt := _engines_test.NewOpts()
13 |
14 | // Test cases.
15 | tchar := []_engines_test.TestCaseHasAnyResults{{
16 | Query: "ping",
17 | Options: opt,
18 | }}
19 |
20 | tccr := []_engines_test.TestCaseContainsResults{{
21 | Query: "facebook",
22 | ResultURLs: []string{"facebook.com"},
23 | Options: opt,
24 | }}
25 |
26 | tcrr := []_engines_test.TestCaseRankedResults{{
27 | Query: "wikipedia",
28 | ResultURLs: []string{"wikipedia."},
29 | Options: opt,
30 | }}
31 |
32 | se := New()
33 | se.InitSearcher(context.Background())
34 |
35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr)
36 | }
37 |
--------------------------------------------------------------------------------
/src/search/engines/duckduckgo/ddg.md:
--------------------------------------------------------------------------------
1 | # DuckDuckGo
2 |
3 | Send a [POST request](https://github.com/gocolly/colly/issues/175#issuecomment-400024313) to `https://lite.duckduckgo.com/lite/` with body: `q=&dc=`. It will return 20-22 results. GET requests could be used like `https://lite.duckduckgo.com/lite/?q=&dc=`.
4 |
5 | First request could be: col.PostRaw(Info.URL, []byte("q="+query+"&dc=1"))
6 |
7 | This may be useful: http://api.jquery.com/index/
8 |
9 | The href on the title sometimes contains telemetry, and is not a valid URL then. That's why we fetch the scheme from it, and append it to the span text.
10 |
--------------------------------------------------------------------------------
/src/search/engines/duckduckgo/dompaths.go:
--------------------------------------------------------------------------------
1 | package duckduckgo
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | var dompaths = scraper.DOMPaths{
8 | ResultsContainer: "div.filters > table > tbody",
9 | URL: "td > a.result-link",
10 | Title: "td > a.result-link",
11 | Description: "td.result-snippet",
12 | }
13 |
--------------------------------------------------------------------------------
/src/search/engines/duckduckgo/info.go:
--------------------------------------------------------------------------------
1 | package duckduckgo
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | const (
8 | seName = engines.DUCKDUCKGO
9 | searchURL = "https://lite.duckduckgo.com/lite/"
10 | suggestURL = "https://duckduckgo.com/ac/"
11 | )
12 |
13 | var origins = [...]engines.Name{seName, engines.BING}
14 |
--------------------------------------------------------------------------------
/src/search/engines/duckduckgo/new.go:
--------------------------------------------------------------------------------
1 | package duckduckgo
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | type Engine struct {
8 | scraper.EngineBase
9 | }
10 |
11 | func New() *Engine {
12 | return &Engine{scraper.EngineBase{
13 | Name: seName,
14 | Origins: origins[:],
15 | }}
16 | }
17 |
--------------------------------------------------------------------------------
/src/search/engines/duckduckgo/params.go:
--------------------------------------------------------------------------------
1 | package duckduckgo
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 |
7 | "github.com/hearchco/agent/src/search/engines/options"
8 | )
9 |
10 | const (
11 | // Variable params.
12 | paramQueryK = "q"
13 | paramPageK = "dc"
14 | cookieLocaleK = "kl" // Should be Locale with _ replaced by - and first 2 letters as last and vice versa.
15 | // paramSafeSearchK = "" // Always enabled.
16 |
17 | // Suggestions variable params.
18 | sugParamTypeK, sugParamTypeV = "type", "list"
19 | )
20 |
21 | func localeCookieString(locale options.Locale) string {
22 | spl := strings.SplitN(strings.ToLower(locale.String()), "_", 2)
23 | return fmt.Sprintf("%v=%v-%v", cookieLocaleK, spl[1], spl[0])
24 | }
25 |
--------------------------------------------------------------------------------
/src/search/engines/duckduckgo/s_suggestions.go:
--------------------------------------------------------------------------------
1 | package duckduckgo
2 |
3 | import (
4 | "sync/atomic"
5 |
6 | "github.com/gocolly/colly/v2"
7 | "github.com/rs/zerolog/log"
8 |
9 | "github.com/hearchco/agent/src/search/engines/options"
10 | "github.com/hearchco/agent/src/search/result"
11 | "github.com/hearchco/agent/src/search/scraper"
12 | "github.com/hearchco/agent/src/utils/anonymize"
13 | "github.com/hearchco/agent/src/utils/moreurls"
14 | )
15 |
16 | func (se Engine) Suggest(query string, options options.Options, sugChan chan result.SuggestionScraped) ([]error, bool) {
17 | foundResults := atomic.Bool{}
18 | retErrors := make([]error, 0, 1)
19 |
20 | se.OnResponse(func(e *colly.Response) {
21 | log.Trace().
22 | Caller().
23 | Bytes("body", e.Body).
24 | Msg("Got response")
25 |
26 | suggs, err := scraper.SuggestRespToSuggestions(e.Body)
27 | if err != nil {
28 | log.Error().
29 | Caller().
30 | Err(err).
31 | Bytes("body", e.Body).
32 | Msg("Failed to convert response to suggestions")
33 | } else {
34 | log.Trace().
35 | Caller().
36 | Str("engine", se.Name.String()).
37 | Strs("suggestions", suggs).
38 | Msg("Sending suggestions to channel")
39 | for i, sug := range suggs {
40 | sugChan <- result.NewSuggestionScraped(sug, se.Name, i+1)
41 | }
42 | if !foundResults.Load() {
43 | foundResults.Store(true)
44 | }
45 | }
46 | })
47 |
48 | ctx := colly.NewContext()
49 |
50 | // Build the parameters.
51 | params := moreurls.NewParams(
52 | paramQueryK, query,
53 | sugParamTypeK, sugParamTypeV,
54 | )
55 |
56 | // Build the url.
57 | urll := moreurls.Build(suggestURL, params)
58 |
59 | // Build anonymous url, by anonymizing the query.
60 | params.Set(paramQueryK, anonymize.String(query))
61 | anonUrll := moreurls.Build(suggestURL, params)
62 |
63 | // Send the request.
64 | if err := se.Get(ctx, urll, anonUrll); err != nil {
65 | retErrors = append(retErrors, err)
66 | }
67 |
68 | se.Wait()
69 | close(sugChan)
70 | return retErrors[:len(retErrors):len(retErrors)], foundResults.Load()
71 | }
72 |
--------------------------------------------------------------------------------
/src/search/engines/duckduckgo/s_suggestions_test.go:
--------------------------------------------------------------------------------
1 | package duckduckgo
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestSuggest(t *testing.T) {
11 | se := New()
12 | se.InitSuggester(context.Background())
13 | _engines_test.CheckSuggest(t, se, "test")
14 | }
15 |
--------------------------------------------------------------------------------
/src/search/engines/duckduckgo/s_web_test.go:
--------------------------------------------------------------------------------
1 | package duckduckgo
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestWebSearch(t *testing.T) {
11 | // Testing options.
12 | opt := _engines_test.NewOpts()
13 |
14 | // Test cases.
15 | tchar := []_engines_test.TestCaseHasAnyResults{{
16 | Query: "ping",
17 | Options: opt,
18 | }}
19 |
20 | tccr := []_engines_test.TestCaseContainsResults{{
21 | Query: "facebook",
22 | ResultURLs: []string{"facebook.com"},
23 | Options: opt,
24 | }}
25 |
26 | tcrr := []_engines_test.TestCaseRankedResults{{
27 | Query: "wikipedia",
28 | ResultURLs: []string{"wikipedia."},
29 | Options: opt,
30 | }}
31 |
32 | se := New()
33 | se.InitSearcher(context.Background())
34 |
35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr)
36 | }
37 |
--------------------------------------------------------------------------------
/src/search/engines/etools/dompaths.go:
--------------------------------------------------------------------------------
1 | package etools
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | var dompaths = scraper.DOMPaths{
8 | Result: "table.result > tbody > tr",
9 | URL: "td.record > a",
10 | Title: "td.record > a",
11 | Description: "td.record > div.text",
12 | }
13 |
--------------------------------------------------------------------------------
/src/search/engines/etools/etools.md:
--------------------------------------------------------------------------------
1 | # Etools
2 |
3 | The first page request is a POST request that looks like:
4 | https://www.etools.ch/searchSubmit.do
5 | BODY: query=something&country=web&language=all&token=5d8d98d9a968388eeb4191afa00ca469
6 | Also works without token.
7 |
8 | The requests for subsequent pages are GET requests that look like:
9 | https://www.etools.ch/search.do?page=4
10 | With a session cookie you got from some previous request:
11 | JSESSIONID=147933E3060CF19256C3581D55E7A72A
12 |
13 | You can submit a GET request like:
14 | https://www.etools.ch/search.do?page=4&query=cool+cars
15 | But you need the JSESSIONID cookie for it to work
16 |
17 | It seems that, if performed too fast, the server can accidentaly return the same response for different pages. Thus, this package could benefit from some Timings.
18 |
19 |
20 | `?dataSourceResults=20` loads more requests
21 |
22 | Possible settings to apply: `https://www.etools.ch/searchSettings.do`
23 | Interesting are especially: `Results per search engine` and `Results per page`
24 |
25 | Captcha Example:
26 | 
--------------------------------------------------------------------------------
/src/search/engines/etools/info.go:
--------------------------------------------------------------------------------
1 | package etools
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | const (
8 | seName = engines.ETOOLS
9 | searchURL = "https://www.etools.ch/searchSubmit.do"
10 | pageURL = "https://www.etools.ch/search.do"
11 | )
12 |
13 | var origins = [...]engines.Name{seName, engines.BING, engines.BRAVE, engines.DUCKDUCKGO, engines.GOOGLE, engines.MOJEEK, engines.QWANT, engines.YAHOO}
14 |
--------------------------------------------------------------------------------
/src/search/engines/etools/new.go:
--------------------------------------------------------------------------------
1 | package etools
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | type Engine struct {
8 | scraper.EngineBase
9 | }
10 |
11 | func New() *Engine {
12 | return &Engine{scraper.EngineBase{
13 | Name: seName,
14 | Origins: origins[:],
15 | }}
16 | }
17 |
--------------------------------------------------------------------------------
/src/search/engines/etools/params.go:
--------------------------------------------------------------------------------
1 | package etools
2 |
3 | const (
4 | // Variable params.
5 | paramQueryK = "query"
6 | paramPageK = "page"
7 | paramSafeSearchK = "safeSearch" // Can be "true" or "false".
8 |
9 | // Constant params.
10 | paramCountryK, paramCountryV = "country", "web"
11 | paramLanguageK, paramLanguageV = "language", "all"
12 | )
13 |
14 | func safeSearchValue(safesearch bool) string {
15 | if safesearch {
16 | return "true"
17 | } else {
18 | return "false"
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/src/search/engines/etools/s_web_test.go:
--------------------------------------------------------------------------------
1 | package etools
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestWebSearch(t *testing.T) {
11 | // Testing options.
12 | opt := _engines_test.NewOpts()
13 |
14 | // Test cases.
15 | tchar := []_engines_test.TestCaseHasAnyResults{{
16 | Query: "ping",
17 | Options: opt,
18 | }}
19 |
20 | tccr := []_engines_test.TestCaseContainsResults{{
21 | Query: "facebook",
22 | ResultURLs: []string{"facebook.com"},
23 | Options: opt,
24 | }}
25 |
26 | tcrr := []_engines_test.TestCaseRankedResults{{
27 | Query: "wikipedia",
28 | ResultURLs: []string{"wikipedia."},
29 | Options: opt,
30 | }}
31 |
32 | se := New()
33 | se.InitSearcher(context.Background())
34 |
35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr)
36 | }
37 |
--------------------------------------------------------------------------------
/src/search/engines/google/dompaths.go:
--------------------------------------------------------------------------------
1 | package google
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | var dompaths = scraper.DOMPaths{
8 | Result: "div.g",
9 | URL: "a",
10 | Title: "a > h3",
11 | Description: "div > span",
12 | }
13 |
--------------------------------------------------------------------------------
/src/search/engines/google/info.go:
--------------------------------------------------------------------------------
1 | package google
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | const (
8 | seName = engines.GOOGLE
9 | searchURL = "https://www.google.com/search"
10 | imageSearchURL = "https://www.google.com/search"
11 | suggestURL = "https://suggestqueries.google.com/complete/search"
12 | )
13 |
14 | var origins = [...]engines.Name{seName}
15 |
--------------------------------------------------------------------------------
/src/search/engines/google/json.go:
--------------------------------------------------------------------------------
1 | package google
2 |
3 | type imgJsonResponse struct {
4 | ISCHJ ischj `json:"ischj"`
5 | }
6 |
7 | type ischj struct {
8 | Metadata []metadata `json:"metadata"`
9 | }
10 |
11 | type metadata struct {
12 | Result jsonResult `json:"result"`
13 | TextInGrid textInGrid `json:"text_in_grid"`
14 | OriginalImage image `json:"original_image"`
15 | Thumbnail image `json:"thumbnail"`
16 | }
17 |
18 | type jsonResult struct {
19 | ReferrerUrl string `json:"referrer_url"`
20 | PageTitle string `json:"page_title"`
21 | SiteTitle string `json:"site_title"`
22 | }
23 |
24 | type textInGrid struct {
25 | Snippet string `json:"snippet"`
26 | }
27 |
28 | type image struct {
29 | Url string `json:"url"`
30 | Height int `json:"height"`
31 | Width int `json:"width"`
32 | }
33 |
--------------------------------------------------------------------------------
/src/search/engines/google/new.go:
--------------------------------------------------------------------------------
1 | package google
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | type Engine struct {
8 | scraper.EngineBase
9 | }
10 |
11 | func New() *Engine {
12 | return &Engine{scraper.EngineBase{
13 | Name: seName,
14 | Origins: origins[:],
15 | }}
16 | }
17 |
--------------------------------------------------------------------------------
/src/search/engines/google/params.go:
--------------------------------------------------------------------------------
1 | package google
2 |
3 | import (
4 | "strings"
5 |
6 | "github.com/hearchco/agent/src/search/engines/options"
7 | )
8 |
9 | const (
10 | // Variable params.
11 | paramQueryK = "q"
12 | paramPageK = "start"
13 | paramLocaleK = "hl" // Should be first 2 characters of Locale.
14 | paramLocaleSecK = "lr" // Should be first 2 characters of Locale with prefixed "lang_".
15 | paramSafeSearchK = "safe" // Can be "off", "medium or "high".
16 |
17 | // Constant params.
18 | paramFilterK, paramFilterV = "filter", "0"
19 |
20 | // Image search variable params.
21 | imgParamPageK, imgParamPageVPrefix = "async", "_fmt:json,p:1,ijn:"
22 |
23 | // Image search constant params.
24 | imgParamTbmK, imgParamTbmV = "tbm", "isch"
25 | imgParamAsearchK, imgParamAsearchV = "asearch", "isch"
26 |
27 | // Suggestions constant params.
28 | sugParamClientK, sugParamClientV = "client", "firefox"
29 | )
30 |
31 | func localeParamValues(locale options.Locale) (string, string) {
32 | lang := strings.SplitN(strings.ToLower(locale.String()), "_", 2)[0]
33 | return lang, "lang_" + lang
34 | }
35 |
36 | func safeSearchParamValue(safesearch bool) string {
37 | if safesearch {
38 | return "high"
39 | } else {
40 | return "off"
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/search/engines/google/s_images_test.go:
--------------------------------------------------------------------------------
1 | package google
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestImageSearch(t *testing.T) {
11 | // Testing options.
12 | opt := _engines_test.NewOpts()
13 |
14 | // Test cases.
15 | tchar := []_engines_test.TestCaseHasAnyResults{{
16 | Query: "ping",
17 | Options: opt,
18 | }}
19 |
20 | tccr := []_engines_test.TestCaseContainsResults{{
21 | Query: "wikipedia logo",
22 | ResultURLs: []string{"upload.wikimedia.org"},
23 | Options: opt,
24 | }}
25 |
26 | tcrr := []_engines_test.TestCaseRankedResults{{
27 | Query: "linux logo wikipedia",
28 | ResultURLs: []string{"upload.wikimedia.org"},
29 | Options: opt,
30 | }}
31 |
32 | se := New()
33 | se.InitSearcher(context.Background())
34 |
35 | _engines_test.CheckImageSearch(t, se, tchar[:], tccr[:], tcrr[:])
36 | }
37 |
--------------------------------------------------------------------------------
/src/search/engines/google/s_suggestions.go:
--------------------------------------------------------------------------------
1 | package google
2 |
3 | import (
4 | "sync/atomic"
5 |
6 | "github.com/gocolly/colly/v2"
7 | "github.com/rs/zerolog/log"
8 |
9 | "github.com/hearchco/agent/src/search/engines/options"
10 | "github.com/hearchco/agent/src/search/result"
11 | "github.com/hearchco/agent/src/search/scraper"
12 | "github.com/hearchco/agent/src/utils/anonymize"
13 | "github.com/hearchco/agent/src/utils/moreurls"
14 | )
15 |
16 | func (se Engine) Suggest(query string, options options.Options, sugChan chan result.SuggestionScraped) ([]error, bool) {
17 | foundResults := atomic.Bool{}
18 | retErrors := make([]error, 0, 1)
19 |
20 | se.OnResponse(func(e *colly.Response) {
21 | log.Trace().
22 | Caller().
23 | Bytes("body", e.Body).
24 | Msg("Got response")
25 |
26 | suggs, err := scraper.SuggestRespToSuggestions(e.Body)
27 | if err != nil {
28 | log.Error().
29 | Caller().
30 | Err(err).
31 | Bytes("body", e.Body).
32 | Msg("Failed to convert response to suggestions")
33 | } else {
34 | log.Trace().
35 | Caller().
36 | Str("engine", se.Name.String()).
37 | Strs("suggestions", suggs).
38 | Msg("Sending suggestions to channel")
39 | for i, sug := range suggs {
40 | sugChan <- result.NewSuggestionScraped(sug, se.Name, i+1)
41 | }
42 | if !foundResults.Load() {
43 | foundResults.Store(true)
44 | }
45 | }
46 | })
47 |
48 | ctx := colly.NewContext()
49 |
50 | // Build the parameters.
51 | params := moreurls.NewParams(
52 | sugParamClientK, sugParamClientV,
53 | paramQueryK, query,
54 | )
55 |
56 | // Build the url.
57 | urll := moreurls.Build(suggestURL, params)
58 |
59 | // Build anonymous url, by anonymizing the query.
60 | params.Set(paramQueryK, anonymize.String(query))
61 | anonUrll := moreurls.Build(suggestURL, params)
62 |
63 | // Send the request.
64 | if err := se.Get(ctx, urll, anonUrll); err != nil {
65 | retErrors = append(retErrors, err)
66 | }
67 |
68 | se.Wait()
69 | close(sugChan)
70 | return retErrors[:len(retErrors):len(retErrors)], foundResults.Load()
71 | }
72 |
--------------------------------------------------------------------------------
/src/search/engines/google/s_suggestions_test.go:
--------------------------------------------------------------------------------
1 | package google
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestSuggest(t *testing.T) {
11 | se := New()
12 | se.InitSuggester(context.Background())
13 | _engines_test.CheckSuggest(t, se, "test")
14 | }
15 |
--------------------------------------------------------------------------------
/src/search/engines/google/s_web_test.go:
--------------------------------------------------------------------------------
1 | package google
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestWebSearch(t *testing.T) {
11 | // Testing options.
12 | opt := _engines_test.NewOpts()
13 |
14 | // Test cases.
15 | tchar := []_engines_test.TestCaseHasAnyResults{{
16 | Query: "ping",
17 | Options: opt,
18 | }}
19 |
20 | tccr := []_engines_test.TestCaseContainsResults{{
21 | Query: "facebook",
22 | ResultURLs: []string{"facebook.com"},
23 | Options: opt,
24 | }}
25 |
26 | tcrr := []_engines_test.TestCaseRankedResults{{
27 | Query: "wikipedia",
28 | ResultURLs: []string{"wikipedia."},
29 | Options: opt,
30 | }}
31 |
32 | se := New()
33 | se.InitSearcher(context.Background())
34 |
35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr)
36 | }
37 |
--------------------------------------------------------------------------------
/src/search/engines/googlescholar/dompaths.go:
--------------------------------------------------------------------------------
1 | package googlescholar
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | var dompaths = scraper.DOMPaths{
8 | Result: "div#gs_res_ccl_mid > div.gs_or",
9 | URL: "h3 > a",
10 | Title: "h3 > a",
11 | Description: "div.gs_rs",
12 | }
13 |
--------------------------------------------------------------------------------
/src/search/engines/googlescholar/info.go:
--------------------------------------------------------------------------------
1 | package googlescholar
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | const (
8 | seName = engines.GOOGLESCHOLAR
9 | searchURL = "https://scholar.google.com/scholar"
10 | )
11 |
12 | var origins = [...]engines.Name{seName}
13 |
--------------------------------------------------------------------------------
/src/search/engines/googlescholar/new.go:
--------------------------------------------------------------------------------
1 | package googlescholar
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | type Engine struct {
8 | scraper.EngineBase
9 | }
10 |
11 | func New() *Engine {
12 | return &Engine{scraper.EngineBase{
13 | Name: seName,
14 | Origins: origins[:],
15 | }}
16 | }
17 |
--------------------------------------------------------------------------------
/src/search/engines/googlescholar/params.go:
--------------------------------------------------------------------------------
1 | package googlescholar
2 |
3 | import (
4 | "strings"
5 |
6 | "github.com/hearchco/agent/src/search/engines/options"
7 | )
8 |
9 | const (
10 | // Variables params.
11 | paramQueryK = "q"
12 | paramPageK = "start"
13 | paramLocaleK = "hl" // Should be first 2 characters of Locale.
14 | paramLocaleSecK = "lr" // Should be first 2 characters of Locale with prefixed "lang_".
15 | paramSafeSearchK = "safe" // Can be "off", "medium or "high".
16 |
17 | // Constant values.
18 | paramFilterK, paramFilterV = "filter", "0"
19 | )
20 |
21 | func localeParamValues(locale options.Locale) (string, string) {
22 | lang := strings.SplitN(strings.ToLower(locale.String()), "_", 2)[0]
23 | return lang, "lang_" + lang
24 | }
25 |
26 | func safeSearchParamValue(safesearch bool) string {
27 | if safesearch {
28 | return "high"
29 | } else {
30 | return "off"
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/search/engines/googlescholar/s_web_test.go:
--------------------------------------------------------------------------------
1 | package googlescholar
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestWebSearch(t *testing.T) {
11 | // Testing options.
12 | opt := _engines_test.NewOpts()
13 |
14 | // Test cases.
15 | tchar := []_engines_test.TestCaseHasAnyResults{{
16 | Query: "ping",
17 | Options: opt,
18 | }}
19 |
20 | tccr := []_engines_test.TestCaseContainsResults{{
21 | Query: "interaction nets",
22 | ResultURLs: []string{"https://dl.acm.org/doi/pdf/10.1145/96709.96718"},
23 | Options: opt,
24 | }}
25 |
26 | tcrr := []_engines_test.TestCaseRankedResults{{
27 | Query: "On building fast kd-trees for ray tracing, and on doing that in O (N log N)",
28 | ResultURLs: []string{"https://ieeexplore.ieee.org/abstract/document/4061547/"},
29 | Options: opt,
30 | }}
31 |
32 | se := New()
33 | se.InitSearcher(context.Background())
34 |
35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr)
36 | }
37 |
--------------------------------------------------------------------------------
/src/search/engines/googlescholar/telemetry.go:
--------------------------------------------------------------------------------
1 | package googlescholar
2 |
3 | import (
4 | "net/url"
5 | )
6 |
7 | // Remove seemingly unused params in query.
8 | func removeTelemetry(link string) (string, error) {
9 | parsedURL, err := url.Parse(link)
10 | if err != nil {
11 | return link, err
12 | }
13 |
14 | q := parsedURL.Query()
15 | for _, key := range []string{"dq", "lr", "oi", "ots", "sig"} {
16 | q.Del(key)
17 | }
18 | parsedURL.RawQuery = q.Encode()
19 |
20 | return parsedURL.String(), nil
21 | }
22 |
--------------------------------------------------------------------------------
/src/search/engines/mojeek/dompaths.go:
--------------------------------------------------------------------------------
1 | package mojeek
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | var dompaths = scraper.DOMPaths{
8 | Result: "ul.results-standard > li",
9 | URL: "h2 > a.title",
10 | Title: "h2 > a.title",
11 | Description: "p.s",
12 | }
13 |
--------------------------------------------------------------------------------
/src/search/engines/mojeek/info.go:
--------------------------------------------------------------------------------
1 | package mojeek
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | const (
8 | seName = engines.MOJEEK
9 | searchURL = "https://www.mojeek.com/search"
10 | )
11 |
12 | var origins = [...]engines.Name{seName}
13 |
--------------------------------------------------------------------------------
/src/search/engines/mojeek/new.go:
--------------------------------------------------------------------------------
1 | package mojeek
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | type Engine struct {
8 | scraper.EngineBase
9 | }
10 |
11 | func New() *Engine {
12 | return &Engine{scraper.EngineBase{
13 | Name: seName,
14 | Origins: origins[:],
15 | }}
16 | }
17 |
--------------------------------------------------------------------------------
/src/search/engines/mojeek/params.go:
--------------------------------------------------------------------------------
1 | package mojeek
2 |
3 | import (
4 | "strings"
5 |
6 | "github.com/hearchco/agent/src/search/engines/options"
7 | )
8 |
9 | const (
10 | // Variable params.
11 | paramQueryK = "q"
12 | paramPageK = "s"
13 | paramLocaleK = "lb" // Should be first 2 characters of Locale.
14 | paramLocaleSecK = "arc" // Should be last 2 characters of Locale.
15 | paramSafeSearchK = "safe" // Can be "0" or "1".
16 | )
17 |
18 | func localeParamValues(locale options.Locale) (string, string) {
19 | spl := strings.SplitN(strings.ToLower(locale.String()), "_", 2)
20 | return spl[0], spl[1]
21 | }
22 |
23 | func safeSearchParamValue(safesearch bool) string {
24 | if safesearch {
25 | return "1"
26 | } else {
27 | return "0"
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/src/search/engines/mojeek/s_web_test.go:
--------------------------------------------------------------------------------
1 | package mojeek
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestWebSearch(t *testing.T) {
11 | // Testing options.
12 | opt := _engines_test.NewOpts()
13 |
14 | // Test cases.
15 | tchar := []_engines_test.TestCaseHasAnyResults{{
16 | Query: "ping",
17 | Options: opt,
18 | }}
19 |
20 | tccr := []_engines_test.TestCaseContainsResults{{
21 | Query: "facebook",
22 | ResultURLs: []string{"facebook.com"},
23 | Options: opt,
24 | }}
25 |
26 | tcrr := []_engines_test.TestCaseRankedResults{{
27 | Query: "wikipedia",
28 | ResultURLs: []string{"wikipedia."},
29 | Options: opt,
30 | }}
31 |
32 | se := New()
33 | se.InitSearcher(context.Background())
34 |
35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr)
36 | }
37 |
--------------------------------------------------------------------------------
/src/search/engines/name.go:
--------------------------------------------------------------------------------
1 | package engines
2 |
3 | import "strings"
4 |
5 | type Name int
6 |
7 | //go:generate enumer -type=Name -json -text
8 | //go:generate go run github.com/hearchco/agent/generate/enginer -type=Name -packagename search -output ../engine_enginer.go
9 | const (
10 | UNDEFINED Name = iota
11 | BING // enginer,websearcher,imagesearcher
12 | BRAVE // enginer,websearcher
13 | DUCKDUCKGO // enginer,websearcher,suggester
14 | ETOOLS // enginer,websearcher
15 | GOOGLE // enginer,websearcher,imagesearcher,suggester
16 | GOOGLESCHOLAR // enginer,websearcher
17 | MOJEEK // enginer,websearcher
18 | PRESEARCH // enginer,websearcher
19 | QWANT // enginer,websearcher
20 | STARTPAGE // enginer,websearcher
21 | SWISSCOWS // enginer,websearcher
22 | YAHOO // enginer,websearcher
23 | YEP // disabled
24 | )
25 |
26 | // Returns engine names without UNDEFINED.
27 | func Names() []Name {
28 | return _NameValues[1:]
29 | }
30 |
31 | func (n Name) ToLower() string {
32 | return strings.ToLower(n.String())
33 | }
34 |
--------------------------------------------------------------------------------
/src/search/engines/options/locale.go:
--------------------------------------------------------------------------------
1 | package options
2 |
3 | import (
4 | "fmt"
5 | )
6 |
7 | // format: en_US
8 | type Locale string
9 |
10 | const LocaleDefault Locale = "en_US"
11 |
12 | func (l Locale) String() string {
13 | return string(l)
14 | }
15 |
16 | func (l Locale) Validate() error {
17 | if l == "" {
18 | return fmt.Errorf("invalid locale: empty")
19 | }
20 |
21 | if len(l) != 5 {
22 | return fmt.Errorf("invalid locale: isn't 5 characters long")
23 | }
24 |
25 | if !(('a' <= l[0] && l[0] <= 'z') && ('a' <= l[1] && l[1] <= 'z')) {
26 | return fmt.Errorf("invalid locale: first two characters must be lowercase ASCII letters")
27 | }
28 |
29 | if !(('A' <= l[3] && l[3] <= 'Z') && ('A' <= l[4] && l[4] <= 'Z')) {
30 | return fmt.Errorf("invalid locale: last two characters must be uppercase ASCII letters")
31 | }
32 |
33 | if l[2] != '_' {
34 | return fmt.Errorf("invalid locale: third character must be underscore")
35 | }
36 |
37 | return nil
38 | }
39 |
40 | func StringToLocale(s string) (Locale, error) {
41 | l := Locale(s)
42 | if err := l.Validate(); err != nil {
43 | return "", err
44 | }
45 |
46 | return l, nil
47 | }
48 |
--------------------------------------------------------------------------------
/src/search/engines/options/structs.go:
--------------------------------------------------------------------------------
1 | package options
2 |
3 | // User provided options for every search engine.
4 | type Options struct {
5 | Pages Pages
6 | Locale Locale
7 | SafeSearch bool
8 | }
9 |
10 | // Start must be 0-based index.
11 | // Max must be greater than 0.
12 | type Pages struct {
13 | Start int
14 | Max int
15 | }
16 |
--------------------------------------------------------------------------------
/src/search/engines/presearch/info.go:
--------------------------------------------------------------------------------
1 | package presearch
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | const (
8 | seName = engines.PRESEARCH
9 | searchURL = "https://presearch.com/search"
10 | )
11 |
12 | var origins = [...]engines.Name{seName, engines.GOOGLE}
13 |
--------------------------------------------------------------------------------
/src/search/engines/presearch/json.go:
--------------------------------------------------------------------------------
1 | package presearch
2 |
3 | type jsonResult struct {
4 | Title string `json:"title"`
5 | Link string `json:"link"`
6 | Desc string `json:"description"`
7 | Favicon string `json:"favicon"`
8 | }
9 |
10 | type jsonResponse struct {
11 | Results struct {
12 | StandardResults []jsonResult `json:"standardResults"`
13 | } `json:"results"`
14 | }
15 |
--------------------------------------------------------------------------------
/src/search/engines/presearch/new.go:
--------------------------------------------------------------------------------
1 | package presearch
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | type Engine struct {
8 | scraper.EngineBase
9 | }
10 |
11 | func New() *Engine {
12 | return &Engine{scraper.EngineBase{
13 | Name: seName,
14 | Origins: origins[:],
15 | }}
16 | }
17 |
--------------------------------------------------------------------------------
/src/search/engines/presearch/params.go:
--------------------------------------------------------------------------------
1 | package presearch
2 |
3 | import (
4 | "fmt"
5 | )
6 |
7 | const (
8 | // Variable params.
9 | paramQueryK = "q"
10 | paramPageK = "page"
11 | cookieSafeSearchK = "use_safe_search" // Can be "true" or "false".
12 | )
13 |
14 | func safeSearchCookieString(safesearch bool) string {
15 | if safesearch {
16 | return fmt.Sprintf("%v=%v", cookieSafeSearchK, "true")
17 | } else {
18 | return fmt.Sprintf("%v=%v", cookieSafeSearchK, "false")
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/src/search/engines/presearch/presearch.md:
--------------------------------------------------------------------------------
1 | # Presearch
2 |
3 | It's open source, but there doesn't seem to be any website code: https://github.com/PresearchOfficial
4 |
5 | GET request: https://presearch.com/search?q=something&page=3
6 | Gets populated with API call: GET https://presearch.com/results?id=5b747ca66cc051a82a6c5bbb784a7fa5f802
7 |
8 | There are cookies:
9 | + settings cookies:
10 | + ai_results_disable:1
11 | + use_safe_search:true
12 | + session cookies:
13 | + presearch_session: eyJpdiI6InBtNVgzZE5YZnUvcXRldGNrZytzTWc9PSIsInZh[...]
14 | + XSRF-TOKEN: eyJpdiI6InN5MlM1Z3ovdkJuQzNBcW5MM0x6RkE9PSIsInZhbHVlI[...]
15 | + weird cookies:
16 | + b: 0
17 | + AWSALB: N5A3Uv4njhnPnihhwOzEBPWXwUZCx/KyphsluMdnYHL[...]
18 | + AWSALBCORS: N5A3Uv4njhnPnihhwOzEBPWXwUZCx/KyphsluMdnY[...]
19 |
20 | The id to pass to results is the JS variable "window.searchId" that gets set on the initial GET request, it is generated server-side
--------------------------------------------------------------------------------
/src/search/engines/presearch/s_web_test.go:
--------------------------------------------------------------------------------
1 | package presearch
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestWebSearch(t *testing.T) {
11 | // Testing options.
12 | opt := _engines_test.NewOpts()
13 |
14 | // Test cases.
15 | tchar := []_engines_test.TestCaseHasAnyResults{{
16 | Query: "ping",
17 | Options: opt,
18 | }}
19 |
20 | tccr := []_engines_test.TestCaseContainsResults{{
21 | Query: "facebook",
22 | ResultURLs: []string{"facebook.com"},
23 | Options: opt,
24 | }}
25 |
26 | tcrr := []_engines_test.TestCaseRankedResults{{
27 | Query: "wikipedia",
28 | ResultURLs: []string{"wikipedia."},
29 | Options: opt,
30 | }}
31 |
32 | se := New()
33 | se.InitSearcher(context.Background())
34 |
35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr)
36 | }
37 |
--------------------------------------------------------------------------------
/src/search/engines/qwant/info.go:
--------------------------------------------------------------------------------
1 | package qwant
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | const (
8 | seName = engines.QWANT
9 | searchURL = "https://api.qwant.com/v3/search/web"
10 | )
11 |
12 | var origins = [...]engines.Name{seName, engines.BING}
13 |
--------------------------------------------------------------------------------
/src/search/engines/qwant/json.go:
--------------------------------------------------------------------------------
1 | package qwant
2 |
3 | type jsonResponse struct {
4 | Status string `json:"status"`
5 | Data struct {
6 | Res struct {
7 | Items struct {
8 | Mainline []jsonMainlineItems `json:"mainline"`
9 | } `json:"items"`
10 | } `json:"result"`
11 | } `json:"data"`
12 | }
13 |
14 | type jsonMainlineItems struct {
15 | Type string `json:"type"`
16 | Items []jsonResults `json:"items"`
17 | }
18 |
19 | type jsonResults struct {
20 | Title string `json:"title"`
21 | URL string `json:"url"`
22 | Description string `json:"desc"`
23 | }
24 |
--------------------------------------------------------------------------------
/src/search/engines/qwant/new.go:
--------------------------------------------------------------------------------
1 | package qwant
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | type Engine struct {
8 | scraper.EngineBase
9 | }
10 |
11 | func New() *Engine {
12 | return &Engine{scraper.EngineBase{
13 | Name: seName,
14 | Origins: origins[:],
15 | }}
16 | }
17 |
--------------------------------------------------------------------------------
/src/search/engines/qwant/params.go:
--------------------------------------------------------------------------------
1 | package qwant
2 |
3 | import (
4 | "strings"
5 |
6 | "github.com/hearchco/agent/src/search/engines/options"
7 | "github.com/rs/zerolog/log"
8 | )
9 |
10 | const (
11 | // Variable params.
12 | paramQueryK = "q"
13 | paramPageK = "offset"
14 | paramLocaleK = "locale" // Same as Locale, only the last two characters are lowered and not everything is supported.
15 | paramSafeSearchK = "safesearch" // Can be "0" or "1".
16 |
17 | // Constant params.
18 | paramCountK, paramCountV = "count", "10"
19 | )
20 |
21 | var validLocales = [...]string{"bg_bg", "br_fr", "ca_ad", "ca_es", "ca_fr", "co_fr", "cs_cz", "cy_gb", "da_dk", "de_at", "de_ch", "de_de", "ec_ca", "el_gr", "en_au", "en_ca", "en_gb", "en_ie", "en_my", "en_nz", "en_us", "es_ad", "es_ar", "es_cl", "es_co", "es_es", "es_mx", "es_pe", "et_ee", "eu_es", "eu_fr", "fc_ca", "fi_fi", "fr_ad", "fr_be", "fr_ca", "fr_ch", "fr_fr", "gd_gb", "he_il", "hu_hu", "it_ch", "it_it", "ko_kr", "nb_no", "nl_be", "nl_nl", "pl_pl", "pt_ad", "pt_pt", "ro_ro", "sv_se", "th_th", "zh_cn", "zh_hk"}
22 |
23 | func localeParamValue(locale options.Locale) string {
24 | l := strings.ToLower(locale.String())
25 | for _, vl := range validLocales {
26 | if l == vl {
27 | return l
28 | }
29 | }
30 |
31 | log.Debug().
32 | Caller().
33 | Str("locale", locale.String()).
34 | Strs("validLocales", validLocales[:]).
35 | Msg("Unsupported locale supplied for this engine, falling back to default")
36 |
37 | return strings.ToLower(options.LocaleDefault.String())
38 | }
39 |
40 | func safeSearchParamValue(safesearch bool) string {
41 | if safesearch {
42 | return "1"
43 | } else {
44 | return "2"
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/search/engines/qwant/qwant.md:
--------------------------------------------------------------------------------
1 | # Qwant
2 |
3 | We access the api (https://api.qwant.com/v3/search/web) and set the necessary headers:
4 |
5 | ```
6 | q:
7 | count: 10
8 | locale: en_GB
9 | offset: 10
10 | device: desktop
11 | safesearch: 1
12 | ```
13 |
14 | To parse the incoming JSON we use https://pkg.go.dev/encoding/json#Unmarshal ([help](https://www.sohamkamani.com/golang/json/)). Especially note:
15 |
16 | > By default, object keys which don't have a corresponding struct field are ignored (see Decoder.DisallowUnknownFields for an alternative).
17 |
18 | We pass data to the colly callbacks like this:
19 |
20 | ```
21 | colCtx := colly.NewContext()
22 | colCtx.Put("offset", strconv.Itoa(i*qResCount))
23 | col.Request("GET", Info.URL, nil, colCtx, nil)
24 | ```
25 |
26 | ^ Instead of colly.Visit(Info.URL)
27 |
28 | For the first result page `col.Visit(Info.URL + query + "&t=web&locale=" + qLocale + "&s=" + qSafeSearch)` could be used. This would emulate an actual user better. Its `.OnHTML` is implemented, but it seems to not play well with the API calls, having some results overlapp, this doesn't make any sense whatsoever. If this is used for first page, then `for i := 0; i < opts.Pages.Max; i++ {` needs start at 1 (i.e. `for i := 0; ....`). When it works and when it doesn't seems random - so it may be best to not touch it. Last query on which it didn't work: `./main --query="jako cudne stvari" --max-pages=2 -vv --visit`
29 |
--------------------------------------------------------------------------------
/src/search/engines/qwant/s_web_test.go:
--------------------------------------------------------------------------------
1 | package qwant
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestWebSearch(t *testing.T) {
11 | // Testing options.
12 | opt := _engines_test.NewOpts()
13 |
14 | // Test cases.
15 | tchar := []_engines_test.TestCaseHasAnyResults{{
16 | Query: "ping",
17 | Options: opt,
18 | }}
19 |
20 | tccr := []_engines_test.TestCaseContainsResults{{
21 | Query: "facebook",
22 | ResultURLs: []string{"facebook.com"},
23 | Options: opt,
24 | }}
25 |
26 | tcrr := []_engines_test.TestCaseRankedResults{{
27 | Query: "wikipedia",
28 | ResultURLs: []string{"wikipedia."},
29 | Options: opt,
30 | }}
31 |
32 | se := New()
33 | se.InitSearcher(context.Background())
34 |
35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr)
36 | }
37 |
--------------------------------------------------------------------------------
/src/search/engines/startpage/dompaths.go:
--------------------------------------------------------------------------------
1 | package startpage
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | var dompaths = scraper.DOMPaths{
8 | Result: "div.w-gl > div.result",
9 | URL: "a.result-title",
10 | Title: "a.result-title",
11 | Description: "p.description",
12 | }
13 |
--------------------------------------------------------------------------------
/src/search/engines/startpage/info.go:
--------------------------------------------------------------------------------
1 | package startpage
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | const (
8 | seName = engines.STARTPAGE
9 | searchURL = "https://www.startpage.com/sp/search"
10 | )
11 |
12 | var origins = [...]engines.Name{seName, engines.GOOGLE}
13 |
--------------------------------------------------------------------------------
/src/search/engines/startpage/new.go:
--------------------------------------------------------------------------------
1 | package startpage
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | type Engine struct {
8 | scraper.EngineBase
9 | }
10 |
11 | func New() *Engine {
12 | return &Engine{scraper.EngineBase{
13 | Name: seName,
14 | Origins: origins[:],
15 | }}
16 | }
17 |
--------------------------------------------------------------------------------
/src/search/engines/startpage/params.go:
--------------------------------------------------------------------------------
1 | package startpage
2 |
3 | const (
4 | // Variable params.
5 | paramQueryK = "q"
6 | paramPageK = "page"
7 |
8 | // Constant params.
9 | paramSafeSearchK, paramSafeSearchV = "qadf", "none" // Can be "none" or empty param (empty means it's enabled).
10 | )
11 |
--------------------------------------------------------------------------------
/src/search/engines/startpage/s_web_test.go:
--------------------------------------------------------------------------------
1 | package startpage
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestWebSearch(t *testing.T) {
11 | // Testing options.
12 | opt := _engines_test.NewOpts()
13 |
14 | // Test cases.
15 | tchar := []_engines_test.TestCaseHasAnyResults{{
16 | Query: "ping",
17 | Options: opt,
18 | }}
19 |
20 | tccr := []_engines_test.TestCaseContainsResults{{
21 | Query: "facebook",
22 | ResultURLs: []string{"facebook.com"},
23 | Options: opt,
24 | }}
25 |
26 | tcrr := []_engines_test.TestCaseRankedResults{{
27 | Query: "wikipedia",
28 | ResultURLs: []string{"wikipedia."},
29 | Options: opt,
30 | }}
31 |
32 | se := New()
33 | se.InitSearcher(context.Background())
34 |
35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr)
36 | }
37 |
--------------------------------------------------------------------------------
/src/search/engines/startpage/startpage.md:
--------------------------------------------------------------------------------
1 | # Startpage
2 |
3 | First search: POST request to https://www.startpage.com/sp/search
4 | with body: abp=-1&additional=%5Bobject+Object%5D&cat=web&language=english&lui=english&query=some+query&sc=BSuId774jcrp20&sgt=1691175704T0afc510362af195aa4ac76bde15e32e85914a4901124669719eaac0e2c326f15&t=
5 |
6 | Sending just cat,language,lui,query gets this:
7 | 
8 |
9 | Resending the previous request gets this:
10 | 
11 |
12 | Request to second page: POST request to https://www.startpage.com/sp/search
13 | with body: language=english&lui=english&abp=-1&query=some+query&cat=web&page=2&sc=HLlIFdefdQOM20
14 |
15 | Resending it worked fine.
16 |
17 | Changing HLlIFdefdQOM20 to HLlIFdefdZOM20 and resending worked fine. Changing it to aaaaaaaaaaaaaa redirects to an error page, that sends the javascript message. The sc value is plainly set in the html (form#search > input[name="sc"]). When last page is hit:
18 | 
19 |
20 | Doesnt use cookies.
21 |
22 | + Safe search is on: add qadf=heavy to POST body
23 | + Safe search is off: add qadf=none to POST body
24 | - Not sure if it needs to be set with every request
25 |
26 | Disabling javascript in browser settings gets the **Error 883** page. However, sending requests through GET: https://www.startpage.com/sp/search?q= works even if javascript is disabled. The GET request works with no cookies / body. For the page, the `page` URL parameter is used. E.g. https://www.startpage.com/sp/search?q=i+dont+get+it&page=3
27 |
28 |
29 | # Locale
30 | The locale is set with the POST body `qloc` variable and looks something like this:
31 | `JTdCJTIyY2MlMjIlM0ElMjJVUyUyMiUyQyUyMmxvY2F0aW9uJTIyJTNBJTIyVW5pdGVkJTIwU3RhdGVzJTIyJTJDJTIyc3RhdGVfY29kZSUyMiUzQSUyMjAwJTIyJTJDJTIydHlwZSUyMiUzQSUyMmN1c3RvbV9sb2NhdGlvbiUyMiU3RA%3D%3D`\
32 | If we replace `%3D`s with `=`s we can base64 decode it into:
33 | `%7B%22cc%22%3A%22US%22%2C%22location%22%3A%22United%20States%22%2C%22state_code%22%3A%2200%22%2C%22type%22%3A%22custom_location%22%7D`\
34 | Which we can url decode into:
35 | `{"cc":"US","location":"United States","state_code":"00","type":"custom_location"}`
36 | Another decoded example is
37 | `{"cc":"CN","location":"People’s Republic of China","state_code":"00","type":"custom_location"}`
38 | It seems for states, the `state_code` is always `00` and the `type` is always `custom_location`. The `location` parameter may be irrelevant, and spoofing `cc` could be sufficent. However, the results dont seem to change when the region is changed, so its impossible to test.
39 |
--------------------------------------------------------------------------------
/src/search/engines/swisscows/authenticator.go:
--------------------------------------------------------------------------------
1 | package swisscows
2 |
3 | import (
4 | "fmt"
5 | "math/rand"
6 | "strings"
7 | "time"
8 | "unicode"
9 |
10 | "github.com/hearchco/agent/src/utils/anonymize"
11 | )
12 |
13 | const alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
14 |
15 | // Returns nonce and signature.
16 | func generateAuth(params string) (string, string, error) {
17 | paramsWOP := strings.ReplaceAll(params, "+", " ")
18 | nonce := generateNonce(32)
19 |
20 | auth, err := generateSignature(paramsWOP, nonce)
21 | if err != nil {
22 | return "", "", fmt.Errorf("failed to generate auth (nonce and signature): %w", err)
23 | }
24 |
25 | return nonce, auth, nil
26 | }
27 |
28 | func generateNonce(length int) string {
29 | r := rand.New(rand.NewSource(time.Now().UnixNano()))
30 |
31 | nonce := ""
32 | for range length {
33 | randInd := r.Intn(length)
34 | nonce += string(alphabet[randInd])
35 | }
36 |
37 | return nonce
38 | }
39 |
40 | func generateSignature(params string, nonce string) (string, error) {
41 | rot13Nonce := rot13Switch(nonce)
42 | data := "/web/search" + params + rot13Nonce
43 | encData := anonymize.CalculateHashBase64(data)
44 | encData = strings.ReplaceAll(encData, "=", "")
45 | encData = strings.ReplaceAll(encData, "+", "-")
46 | encData = strings.ReplaceAll(encData, "/", "_")
47 |
48 | return encData, nil
49 | }
50 |
51 | func rot13Switch(str string) string {
52 | return switchCapitalization(rot13(str))
53 | }
54 |
55 | // Performs rot13 and switches capitalization of each character.
56 | func rot13(str string) string {
57 | result := ""
58 |
59 | for i := range len(str) {
60 | result += string(rot13Byte(str[i]))
61 | }
62 |
63 | return result
64 | }
65 |
66 | func rot13Byte(b byte) byte {
67 | var a, z byte
68 |
69 | switch {
70 | case 'a' <= b && b <= 'z':
71 | a, z = 'a', 'z'
72 | case 'A' <= b && b <= 'Z':
73 | a, z = 'A', 'Z'
74 | default:
75 | return b
76 | }
77 |
78 | return (b-a+13)%(z-a+1) + a
79 | }
80 |
81 | func switchCapitalization(str string) string {
82 | res := ""
83 |
84 | for i := range len(str) {
85 | if unicode.IsUpper(rune(str[i])) {
86 | res += string(unicode.ToLower(rune(str[i])))
87 | } else {
88 | res += string(unicode.ToUpper(rune(str[i])))
89 | }
90 | }
91 |
92 | return res
93 | }
94 |
--------------------------------------------------------------------------------
/src/search/engines/swisscows/info.go:
--------------------------------------------------------------------------------
1 | package swisscows
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | const (
8 | seName = engines.SWISSCOWS
9 | searchURL = "https://api.swisscows.com/web/search"
10 | )
11 |
12 | var origins = [...]engines.Name{seName, engines.BING}
13 |
--------------------------------------------------------------------------------
/src/search/engines/swisscows/json.go:
--------------------------------------------------------------------------------
1 | package swisscows
2 |
3 | type jsonResponse struct {
4 | Items []jsonItem `json:"items"`
5 | }
6 |
7 | type jsonItem struct {
8 | Id string `json:"id"`
9 | Title string `json:"title"`
10 | Desc string `json:"description"`
11 | URL string `json:"url"`
12 | DisplayURL string `json:"displayUrl"`
13 | }
14 |
--------------------------------------------------------------------------------
/src/search/engines/swisscows/new.go:
--------------------------------------------------------------------------------
1 | package swisscows
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | type Engine struct {
8 | scraper.EngineBase
9 | }
10 |
11 | func New() *Engine {
12 | return &Engine{scraper.EngineBase{
13 | Name: seName,
14 | Origins: origins[:],
15 | }}
16 | }
17 |
--------------------------------------------------------------------------------
/src/search/engines/swisscows/params.go:
--------------------------------------------------------------------------------
1 | package swisscows
2 |
3 | import (
4 | "strings"
5 |
6 | "github.com/hearchco/agent/src/search/engines/options"
7 | )
8 |
9 | const (
10 | // Variable params.
11 | paramQueryK = "query"
12 | paramPageK = "offset"
13 | paramLocaleK = "region" // Should be the same as Locale, only with "_" replaced by "-".
14 |
15 | // Constant params.
16 | paramFreshnessK, paramFreshnessV = "freshness", "All"
17 | paramItemsK, paramItemsV = "itemsCount", "10"
18 | )
19 |
20 | func localeParamValue(locale options.Locale) string {
21 | return strings.Replace(locale.String(), "_", "-", 1)
22 | }
23 |
--------------------------------------------------------------------------------
/src/search/engines/swisscows/s_web_test.go:
--------------------------------------------------------------------------------
1 | package swisscows
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestWebSearch(t *testing.T) {
11 | // Testing options.
12 | opt := _engines_test.NewOpts()
13 |
14 | // Test cases.
15 | tchar := []_engines_test.TestCaseHasAnyResults{{
16 | Query: "ping",
17 | Options: opt,
18 | }}
19 |
20 | tccr := []_engines_test.TestCaseContainsResults{{
21 | Query: "facebook",
22 | ResultURLs: []string{"facebook.com"},
23 | Options: opt,
24 | }}
25 |
26 | tcrr := []_engines_test.TestCaseRankedResults{{
27 | Query: "wikipedia",
28 | ResultURLs: []string{"wikipedia."},
29 | Options: opt,
30 | }}
31 |
32 | se := New()
33 | se.InitSearcher(context.Background())
34 |
35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr)
36 | }
37 |
--------------------------------------------------------------------------------
/src/search/engines/yahoo/dompaths.go:
--------------------------------------------------------------------------------
1 | package yahoo
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | var dompaths = scraper.DOMPaths{
8 | Result: "div#main > div > div#web > ol > li > div.algo",
9 | URL: "h3.title > a",
10 | Title: "h3.title > a",
11 | Description: "div > div.compText > p > span",
12 | }
13 |
--------------------------------------------------------------------------------
/src/search/engines/yahoo/info.go:
--------------------------------------------------------------------------------
1 | package yahoo
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | const (
8 | seName = engines.YAHOO
9 | searchURL = "https://search.yahoo.com/search"
10 | )
11 |
12 | var origins = [...]engines.Name{seName, engines.BING}
13 |
--------------------------------------------------------------------------------
/src/search/engines/yahoo/new.go:
--------------------------------------------------------------------------------
1 | package yahoo
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/scraper"
5 | )
6 |
7 | type Engine struct {
8 | scraper.EngineBase
9 | }
10 |
11 | func New() *Engine {
12 | return &Engine{scraper.EngineBase{
13 | Name: seName,
14 | Origins: origins[:],
15 | }}
16 | }
17 |
--------------------------------------------------------------------------------
/src/search/engines/yahoo/params.go:
--------------------------------------------------------------------------------
1 | package yahoo
2 |
3 | import (
4 | "fmt"
5 | )
6 |
7 | const (
8 | // Variable params.
9 | paramQueryK = "p"
10 | paramPageK = "b"
11 | cookieSafeSearchK = "vm" // Can be "p" (disabled) or "r" (enabled).
12 |
13 | // Constant params.
14 | cookieSafeSearchPrefix = "sB=v=1&pn=10&rw=new&userset=0"
15 | // paramSbK, paramSbV = "sB", "v=1"
16 | // paramPnK, paramPnV = "pn", "10"
17 | // paramRwK, paramRwV = "rw", "new"
18 | // paramUsersetK, paramUsersetV = "userset", "0"
19 | )
20 |
21 | func safeSearchCookieString(safesearch bool) string {
22 | if safesearch {
23 | return fmt.Sprintf("%v&%v=%v", cookieSafeSearchPrefix, cookieSafeSearchK, "r")
24 | } else {
25 | return fmt.Sprintf("%v&%v=%v", cookieSafeSearchPrefix, cookieSafeSearchK, "p")
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/src/search/engines/yahoo/s_web_test.go:
--------------------------------------------------------------------------------
1 | package yahoo
2 |
3 | import (
4 | "context"
5 | "testing"
6 |
7 | "github.com/hearchco/agent/src/search/engines/_engines_test"
8 | )
9 |
10 | func TestWebSearch(t *testing.T) {
11 | // Testing options.
12 | opt := _engines_test.NewOpts()
13 |
14 | // Test cases.
15 | tchar := []_engines_test.TestCaseHasAnyResults{{
16 | Query: "ping",
17 | Options: opt,
18 | }}
19 |
20 | tccr := []_engines_test.TestCaseContainsResults{{
21 | Query: "facebook",
22 | ResultURLs: []string{"facebook.com"},
23 | Options: opt,
24 | }}
25 |
26 | tcrr := []_engines_test.TestCaseRankedResults{{
27 | Query: "wikipedia",
28 | ResultURLs: []string{"wikipedia."},
29 | Options: opt,
30 | }}
31 |
32 | se := New()
33 | se.InitSearcher(context.Background())
34 |
35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr)
36 | }
37 |
--------------------------------------------------------------------------------
/src/search/engines/yahoo/telemetry.go:
--------------------------------------------------------------------------------
1 | package yahoo
2 |
3 | import (
4 | "net/url"
5 | "strings"
6 | )
7 |
8 | func removeTelemetry(urll string) (string, error) {
9 | if !strings.Contains(urll, "://r.search.yahoo.com/") {
10 | return urll, nil
11 | }
12 |
13 | suff := strings.SplitAfterN(urll, "/RU=http", 2)[1]
14 | urll = "http" + strings.SplitN(suff, "/RK=", 2)[0]
15 |
16 | newLink, err := url.QueryUnescape(urll)
17 | if err != nil {
18 | return "", err
19 | }
20 |
21 | return newLink, nil
22 | }
23 |
--------------------------------------------------------------------------------
/src/search/engines/yep/info.go:
--------------------------------------------------------------------------------
1 | package yep
2 |
3 | // import (
4 | // "github.com/hearchco/agent/src/search/engines"
5 | // )
6 |
7 | // const (
8 | // seName = engines.YEP
9 | // searchURL = "https://api.yep.com/fs/2/search"
10 | // )
11 |
12 | // var origins = [...]engines.Name{seName}
13 |
--------------------------------------------------------------------------------
/src/search/engines/yep/json.go:
--------------------------------------------------------------------------------
1 | package yep
2 |
3 | // type jsonResponse struct {
4 | // Results []jsonResult `json:"results"`
5 | // }
6 |
7 | // type jsonResult struct {
8 | // URL string `json:"url"`
9 | // Title string `json:"title"`
10 | // TType string `json:"type"`
11 | // Snippet string `json:"snippet"`
12 | // }
13 |
--------------------------------------------------------------------------------
/src/search/engines/yep/new.go:
--------------------------------------------------------------------------------
1 | package yep
2 |
3 | // import (
4 | // "github.com/hearchco/agent/src/search/scraper"
5 | // )
6 |
7 | // type Engine struct {
8 | // scraper.EngineBase
9 | // }
10 |
11 | // func New() *Engine {
12 | // return &Engine{scraper.EngineBase{
13 | // Name: seName,
14 | // Origins: origins[:],
15 | // }}
16 | // }
17 |
--------------------------------------------------------------------------------
/src/search/engines/yep/params.go:
--------------------------------------------------------------------------------
1 | package yep
2 |
3 | // import (
4 | // "fmt"
5 | // "strings"
6 |
7 | // "github.com/hearchco/agent/src/search/engines/options"
8 | // )
9 |
10 | // const (
11 | // paramKeyPage = "limit"
12 | // paramKeyLocale = "gl" // Should be last 2 characters of Locale.
13 | // paramKeySafeSearch = "safeSearch" // Can be "off" or "strict".
14 |
15 | // paramClient = "client=web"
16 | // paramNo_correct = "no_correct=false"
17 | // paramType = "type=web"
18 | // )
19 |
20 | // func localeParamString(locale options.Locale) string {
21 | // country := strings.Split(locale.String(), "_")[1]
22 | // return fmt.Sprintf("%v=%v", paramKeyLocale, country)
23 | // }
24 |
25 | // func safeSearchParamString(safesearch bool) string {
26 | // if safesearch {
27 | // return fmt.Sprintf("%v=%v", paramKeySafeSearch, "strict")
28 | // } else {
29 | // return fmt.Sprintf("%v=%v", paramKeySafeSearch, "off")
30 | // }
31 | // }
32 |
--------------------------------------------------------------------------------
/src/search/engines/yep/s_web_test.go:
--------------------------------------------------------------------------------
1 | package yep
2 |
3 | // import (
4 | // "context"
5 | // "testing"
6 |
7 | // "github.com/hearchco/agent/src/search/category"
8 | // "github.com/hearchco/agent/src/search/engines/_engines_test"
9 | // )
10 |
11 | // func TestWebSearch(t *testing.T) {
12 | // // Testing options.
13 | // conf := _engines_test.NewConfig(seName)
14 | // opt := _engines_test.NewOpts()
15 |
16 | // // Test cases.
17 | // tchar := []_engines_test.TestCaseHasAnyResults{{
18 | // Query: "ping",
19 | // Options: opt,
20 | // }}
21 |
22 | // tccr := []_engines_test.TestCaseContainsResults{{
23 | // Query: "youtube",
24 | // ResultURLs: []string{"youtube.com"},
25 | // Options: opt,
26 | // }}
27 |
28 | // tcrr := []_engines_test.TestCaseRankedResults{{
29 | // Query: "wikipedia",
30 | // ResultURLs: []string{"wikipedia."},
31 | // Options: opt,
32 | // }}
33 |
34 | // se := New()
35 | // se.Init(context.Background())
36 |
37 | // _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr)
38 | // }
39 |
--------------------------------------------------------------------------------
/src/search/groups.go:
--------------------------------------------------------------------------------
1 | package search
2 |
3 | const groupRequired = "required"
4 | const groupRequiredByOrigin = "required_by_origin"
5 | const groupPreferred = "preferred"
6 | const groupPreferredByOrigin = "preferred_by_origin"
7 |
--------------------------------------------------------------------------------
/src/search/init.go:
--------------------------------------------------------------------------------
1 | package search
2 |
3 | import (
4 | "context"
5 |
6 | "github.com/hearchco/agent/src/search/engines"
7 | "github.com/hearchco/agent/src/search/scraper"
8 | )
9 |
10 | // Initialize web searchers.
11 | func initializeWebSearchers(ctx context.Context, engs []engines.Name) []scraper.WebSearcher {
12 | searchers := webSearcherArray()
13 | for _, engName := range engs {
14 | searchers[engName].InitSearcher(ctx)
15 | }
16 | return searchers[:]
17 | }
18 |
19 | // Initialize image searchers.
20 | func initializeImageSearchers(ctx context.Context, engs []engines.Name) []scraper.ImageSearcher {
21 | searchers := imageSearcherArray()
22 | for _, engName := range engs {
23 | searchers[engName].InitSearcher(ctx)
24 | }
25 | return searchers[:]
26 | }
27 |
28 | // Initialize suggesters.
29 | func initializeSuggesters(ctx context.Context, engs []engines.Name) []scraper.Suggester {
30 | suggesters := suggesterArray()
31 | for _, engName := range engs {
32 | suggesters[engName].InitSuggester(ctx)
33 | }
34 | return suggesters[:]
35 | }
36 |
--------------------------------------------------------------------------------
/src/search/once.go:
--------------------------------------------------------------------------------
1 | package search
2 |
3 | import (
4 | "sync"
5 | "sync/atomic"
6 |
7 | "github.com/hearchco/agent/src/search/engines"
8 | )
9 |
10 | type onceWrapper struct {
11 | once *sync.Once
12 | errored atomic.Bool
13 | scraped atomic.Bool
14 | }
15 |
16 | func initOnceWrapper(engs []engines.Name) map[engines.Name]*onceWrapper {
17 | onceWrapMap := make(map[engines.Name]*onceWrapper, len(engs))
18 | for _, eng := range engs {
19 | onceWrapMap[eng] = &onceWrapper{
20 | once: &sync.Once{},
21 | errored: atomic.Bool{},
22 | scraped: atomic.Bool{},
23 | }
24 | }
25 | return onceWrapMap
26 | }
27 |
28 | func (ow *onceWrapper) Do(f func()) {
29 | ow.once.Do(f)
30 | }
31 |
32 | func (ow *onceWrapper) Errored() {
33 | if !ow.errored.Load() {
34 | ow.errored.Store(true)
35 | }
36 | }
37 |
38 | func (ow *onceWrapper) Scraped() {
39 | if !ow.scraped.Load() {
40 | ow.scraped.Store(true)
41 | }
42 | }
43 |
44 | func (ow *onceWrapper) Success() bool {
45 | return !ow.errored.Load() && ow.scraped.Load()
46 | }
47 |
--------------------------------------------------------------------------------
/src/search/params.go:
--------------------------------------------------------------------------------
1 | package search
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/hearchco/agent/src/search/engines/options"
7 | )
8 |
9 | func validateParams(query string, opts options.Options) error {
10 | if query == "" {
11 | return fmt.Errorf("query can't be empty")
12 | }
13 | if opts.Locale == "" {
14 | return fmt.Errorf("locale can't be empty")
15 | }
16 | if opts.Pages.Start < 0 {
17 | return fmt.Errorf("pages start can't be negative")
18 | }
19 | if opts.Pages.Max < 1 {
20 | return fmt.Errorf("pages max can't be less than 1")
21 | }
22 |
23 | return nil
24 | }
25 |
--------------------------------------------------------------------------------
/src/search/receiver.go:
--------------------------------------------------------------------------------
1 | package search
2 |
3 | import (
4 | "sync"
5 |
6 | "github.com/hearchco/agent/src/search/result"
7 | )
8 |
9 | func createReceiver[T any](wg *sync.WaitGroup, valChan chan T, concMap result.ConcMapper[T]) {
10 | // Signal that the receiver is done.
11 | defer wg.Done()
12 |
13 | for recVal := range valChan {
14 | concMap.AddOrUpgrade(recVal)
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/src/search/result/construct.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | import (
4 | "fmt"
5 | "net/url"
6 |
7 | "github.com/hearchco/agent/src/search/engines"
8 | )
9 |
10 | func ConstructResult(seName engines.Name, urll string, title string, description string, page int, onPageRank int) (WebScraped, error) {
11 | if urll == "" {
12 | return WebScraped{}, fmt.Errorf("invalid URL: empty")
13 | }
14 |
15 | u, err := url.Parse(urll)
16 | if err != nil {
17 | return WebScraped{}, fmt.Errorf("invalid URL: %s", err)
18 | }
19 |
20 | if u.Hostname() == "" {
21 | return WebScraped{}, fmt.Errorf("invalid URL: no hostname")
22 | }
23 |
24 | if title == "" {
25 | return WebScraped{}, fmt.Errorf("invalid title: empty")
26 | }
27 |
28 | if page <= 0 {
29 | return WebScraped{}, fmt.Errorf("invalid page: %d", page)
30 | }
31 |
32 | if onPageRank <= 0 {
33 | return WebScraped{}, fmt.Errorf("invalid onPageRank: %d", onPageRank)
34 | }
35 |
36 | return WebScraped{
37 | url: u.String(),
38 | title: title,
39 | description: description,
40 | rank: RankScraped{
41 | RankSimpleScraped{
42 | searchEngine: seName,
43 | rank: 0, // This gets calculated when ranking the results.
44 | },
45 | page,
46 | onPageRank,
47 | },
48 | }, nil
49 | }
50 |
51 | func ConstructImagesResult(
52 | seName engines.Name, urll string, title string, description string, page int, onPageRank int,
53 | originalHeight int, originalWidth int, thumbnailHeight int, thumbnailWidth int,
54 | thumbnailUrl string, sourceName string, sourceUrl string,
55 | ) (ImagesScraped, error) {
56 | res, err := ConstructResult(seName, urll, title, description, page, onPageRank)
57 | if err != nil {
58 | return ImagesScraped{}, err
59 | }
60 |
61 | if originalHeight <= 0 {
62 | return ImagesScraped{}, fmt.Errorf("invalid originalHeight: %d", originalHeight)
63 | }
64 |
65 | if originalWidth <= 0 {
66 | return ImagesScraped{}, fmt.Errorf("invalid originalWidth: %d", originalWidth)
67 | }
68 |
69 | if thumbnailHeight <= 0 {
70 | return ImagesScraped{}, fmt.Errorf("invalid thumbnailHeight: %d", thumbnailHeight)
71 | }
72 |
73 | if thumbnailWidth <= 0 {
74 | return ImagesScraped{}, fmt.Errorf("invalid thumbnailWidth: %d", thumbnailWidth)
75 | }
76 |
77 | if thumbnailUrl == "" {
78 | return ImagesScraped{}, fmt.Errorf("invalid thumbnailUrl: empty")
79 | }
80 |
81 | if sourceUrl == "" {
82 | return ImagesScraped{}, fmt.Errorf("invalid sourceUrl: empty")
83 | }
84 |
85 | return ImagesScraped{
86 | WebScraped: res,
87 |
88 | originalSize: scrapedImageFormat{
89 | height: originalHeight,
90 | width: originalWidth,
91 | },
92 | thumbnailSize: scrapedImageFormat{
93 | height: thumbnailHeight,
94 | width: thumbnailWidth,
95 | },
96 | thumbnailURL: thumbnailUrl,
97 | sourceName: sourceName,
98 | sourceURL: sourceUrl,
99 | }, nil
100 | }
101 |
--------------------------------------------------------------------------------
/src/search/result/interfaces.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | type Result interface {
4 | Key() string
5 | URL() string
6 | FQDN() string
7 | Title() string
8 | Description() string
9 | SetDescription(string)
10 | Rank() int
11 | SetRank(int)
12 | Score() float64
13 | SetScore(float64)
14 | EngineRanks() []Rank
15 | InitEngineRanks()
16 | ShrinkEngineRanks()
17 | AppendEngineRanks(Rank)
18 | ConvertToOutput(string) ResultOutput
19 | Shorten(int, int) Result
20 | }
21 |
22 | type ResultScraped interface {
23 | Key() string
24 | URL() string
25 | Title() string
26 | Description() string
27 | Rank() RankScraped
28 | Convert(int) Result
29 | }
30 |
31 | type ConcMapper[T any] interface {
32 | AddOrUpgrade(T)
33 | }
34 |
--------------------------------------------------------------------------------
/src/search/result/output.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | type ResultOutput any
4 |
5 | func ConvertToOutput(results []Result, secret string) []ResultOutput {
6 | var output = make([]ResultOutput, 0, len(results))
7 | for _, r := range results {
8 | output = append(output, r.ConvertToOutput(secret))
9 | }
10 | return output
11 | }
12 |
13 | func ConvertSuggestionsToOutput(suggestions []Suggestion) []string {
14 | var output = make([]string, 0, len(suggestions))
15 | for _, s := range suggestions {
16 | output = append(output, s.Value())
17 | }
18 | return output
19 | }
20 |
--------------------------------------------------------------------------------
/src/search/result/r_images.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | import (
4 | "time"
5 |
6 | "github.com/rs/zerolog/log"
7 |
8 | "github.com/hearchco/agent/src/utils/anonymize"
9 | )
10 |
11 | type Images struct {
12 | imagesJSON
13 | }
14 |
15 | type imagesJSON struct {
16 | Web
17 |
18 | OriginalSize ImageFormat `json:"original"`
19 | ThumbnailSize ImageFormat `json:"thumbnail"`
20 | ThumbnailURL string `json:"thumbnail_url"`
21 | SourceName string `json:"source"`
22 | SourceURL string `json:"source_url"`
23 | }
24 |
25 | type ImageFormat struct {
26 | Height int `json:"height"`
27 | Width int `json:"width"`
28 | }
29 |
30 | func (r Images) OriginalSize() ImageFormat {
31 | if r.imagesJSON.OriginalSize.Height == 0 || r.imagesJSON.OriginalSize.Width == 0 {
32 | log.Panic().
33 | Int("height", r.imagesJSON.OriginalSize.Height).
34 | Int("width", r.imagesJSON.OriginalSize.Width).
35 | Msg("OriginalSize is zero")
36 | // ^PANIC - Assert because the OriginalSize should never be zero.
37 | }
38 |
39 | return r.imagesJSON.OriginalSize
40 | }
41 |
42 | func (r Images) ThumbnailSize() ImageFormat {
43 | if r.imagesJSON.ThumbnailSize.Height == 0 || r.imagesJSON.ThumbnailSize.Width == 0 {
44 | log.Panic().
45 | Int("height", r.imagesJSON.ThumbnailSize.Height).
46 | Int("width", r.imagesJSON.ThumbnailSize.Width).
47 | Msg("ThumbnailSize is zero")
48 | // ^PANIC - Assert because the ThumbnailSize should never be zero.
49 | }
50 |
51 | return r.imagesJSON.ThumbnailSize
52 | }
53 |
54 | func (r Images) ThumbnailURL() string {
55 | if r.imagesJSON.ThumbnailURL == "" {
56 | log.Panic().Msg("ThumbnailURL is empty")
57 | // ^PANIC - Assert because the ThumbnailURL should never be empty.
58 | }
59 |
60 | return r.imagesJSON.ThumbnailURL
61 | }
62 |
63 | func (r Images) SourceName() string {
64 | return r.imagesJSON.SourceName
65 | }
66 |
67 | func (r Images) SourceURL() string {
68 | return r.imagesJSON.SourceURL
69 | }
70 |
71 | func (r Images) ConvertToOutput(secret string) ResultOutput {
72 | nowT := time.Now()
73 | fqdnHash, fqdnTimestamp := anonymize.CalculateHMACBase64(r.FQDN(), secret, nowT)
74 | urlHash, urlTimestamp := anonymize.CalculateHMACBase64(r.URL(), secret, nowT)
75 | thmbHash, thmbTimestamp := anonymize.CalculateHMACBase64(r.ThumbnailURL(), secret, nowT)
76 |
77 | return ImagesOutput{
78 | imagesOutputJSON{
79 | r,
80 | fqdnHash,
81 | fqdnTimestamp,
82 | urlHash,
83 | urlTimestamp,
84 | thmbHash,
85 | thmbTimestamp,
86 | },
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/src/search/result/r_images_output.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | type ImagesOutput struct {
4 | imagesOutputJSON
5 | }
6 |
7 | type imagesOutputJSON struct {
8 | Images
9 |
10 | FqdnHash string `json:"fqdn_hash,omitempty"`
11 | FqdnHashTimestamp string `json:"fqdn_hash_timestamp,omitempty"`
12 | URLHash string `json:"url_hash,omitempty"`
13 | URLHashTimestamp string `json:"url_hash_timestamp,omitempty"`
14 | ThumbnailURLHash string `json:"thumbnail_url_hash,omitempty"`
15 | ThumbnailURLHashTimestamp string `json:"thumbnail_url_hash_timestamp,omitempty"`
16 | }
17 |
--------------------------------------------------------------------------------
/src/search/result/r_images_scraped.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | import (
4 | "github.com/hearchco/agent/src/utils/moreurls"
5 | "github.com/rs/zerolog/log"
6 | )
7 |
8 | type ImagesScraped struct {
9 | WebScraped
10 |
11 | originalSize scrapedImageFormat
12 | thumbnailSize scrapedImageFormat
13 | thumbnailURL string
14 | sourceName string
15 | sourceURL string
16 | }
17 |
18 | func (r ImagesScraped) OriginalSize() scrapedImageFormat {
19 | if r.originalSize.height == 0 || r.originalSize.width == 0 {
20 | log.Panic().
21 | Int("height", r.originalSize.height).
22 | Int("width", r.originalSize.width).
23 | Msg("OriginalSize is zero")
24 | // ^PANIC - Assert because the OriginalSize should never be zero.
25 | }
26 |
27 | return r.originalSize
28 | }
29 |
30 | func (r ImagesScraped) ThumbnailSize() scrapedImageFormat {
31 | if r.thumbnailSize.height == 0 || r.thumbnailSize.width == 0 {
32 | log.Panic().
33 | Int("height", r.thumbnailSize.height).
34 | Int("width", r.thumbnailSize.width).
35 | Msg("ThumbnailSize is zero")
36 | // ^PANIC - Assert because the ThumbnailSize should never be zero.
37 | }
38 |
39 | return r.thumbnailSize
40 | }
41 |
42 | func (r ImagesScraped) ThumbnailURL() string {
43 | if r.thumbnailURL == "" {
44 | log.Panic().Msg("ThumbnailURL is empty")
45 | // ^PANIC - Assert because the ThumbnailURL should never be empty.
46 | }
47 |
48 | return r.thumbnailURL
49 | }
50 |
51 | func (r ImagesScraped) SourceName() string {
52 | return r.sourceName
53 | }
54 |
55 | func (r ImagesScraped) SourceURL() string {
56 | return r.sourceURL
57 | }
58 |
59 | func (r ImagesScraped) Convert(erCap int) Result {
60 | engineRanks := make([]Rank, 0, erCap)
61 | engineRanks = append(engineRanks, r.Rank().Convert())
62 | return &Images{
63 | imagesJSON{
64 | Web{
65 | webJSON{
66 | URL: r.URL(),
67 | FQDN: moreurls.FQDN(r.URL()),
68 | Title: r.Title(),
69 | Description: r.Description(),
70 | EngineRanks: engineRanks,
71 | },
72 | },
73 | r.OriginalSize().Convert(),
74 | r.ThumbnailSize().Convert(),
75 | r.ThumbnailURL(),
76 | r.SourceName(),
77 | r.SourceURL(),
78 | },
79 | }
80 | }
81 |
82 | type scrapedImageFormat struct {
83 | height int
84 | width int
85 | }
86 |
87 | func (i scrapedImageFormat) GetHeight() int {
88 | if i.height == 0 {
89 | log.Panic().Msg("Height is zero")
90 | // ^PANIC - Assert because the Height should never be zero.
91 | }
92 |
93 | return i.height
94 | }
95 |
96 | func (i scrapedImageFormat) GetWidth() int {
97 | if i.width == 0 {
98 | log.Panic().Msg("Width is zero")
99 | // ^PANIC - Assert because the Width should never be zero.
100 | }
101 |
102 | return i.width
103 | }
104 |
105 | func (i scrapedImageFormat) Convert() ImageFormat {
106 | return ImageFormat{
107 | Height: i.height,
108 | Width: i.width,
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/src/search/result/r_suggestion.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | import (
4 | "github.com/rs/zerolog/log"
5 | )
6 |
7 | type Suggestion struct {
8 | suggestionJSON
9 | }
10 |
11 | type suggestionJSON struct {
12 | Value string `json:"value"`
13 | Rank int `json:"rank"`
14 | Score float64 `json:"score"`
15 | EngineRanks []RankSimple `json:"engine_ranks"`
16 | }
17 |
18 | func (s Suggestion) Value() string {
19 | return s.suggestionJSON.Value
20 | }
21 |
22 | func (s Suggestion) Rank() int {
23 | return s.suggestionJSON.Rank
24 | }
25 |
26 | func (s *Suggestion) SetRank(rank int) {
27 | s.suggestionJSON.Rank = rank
28 | }
29 |
30 | func (s Suggestion) Score() float64 {
31 | return s.suggestionJSON.Score
32 | }
33 |
34 | func (s *Suggestion) SetScore(score float64) {
35 | s.suggestionJSON.Score = score
36 | }
37 |
38 | func (s Suggestion) EngineRanks() []RankSimple {
39 | if s.suggestionJSON.EngineRanks == nil {
40 | log.Panic().Msg("EngineRanks is nil")
41 | // ^PANIC - Assert because the EngineRanks should never be nil.
42 | }
43 |
44 | return s.suggestionJSON.EngineRanks
45 | }
46 |
47 | func (s *Suggestion) ShrinkEngineRanks() {
48 | if s.suggestionJSON.EngineRanks == nil {
49 | log.Panic().Msg("EngineRanks is nil")
50 | // ^PANIC - Assert because the EngineRanks should never be nil.
51 | }
52 |
53 | ranksLen := len(s.suggestionJSON.EngineRanks)
54 | s.suggestionJSON.EngineRanks = s.suggestionJSON.EngineRanks[:ranksLen:ranksLen]
55 | }
56 |
57 | func (s *Suggestion) AppendEngineRanks(rank RankSimple) {
58 | if s.suggestionJSON.EngineRanks == nil {
59 | log.Panic().Msg("EngineRanks is nil")
60 | // ^PANIC - Assert because the EngineRanks should never be nil.
61 | }
62 |
63 | s.suggestionJSON.EngineRanks = append(s.suggestionJSON.EngineRanks, rank)
64 | }
65 |
--------------------------------------------------------------------------------
/src/search/result/r_suggestion_scraped.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | type SuggestionScraped struct {
8 | value string
9 | rank RankSimpleScraped
10 | }
11 |
12 | func NewSuggestionScraped(value string, seName engines.Name, rank int) SuggestionScraped {
13 | r := NewRankSimpleScraped(seName, rank)
14 | return SuggestionScraped{
15 | value,
16 | r,
17 | }
18 | }
19 |
20 | func (s SuggestionScraped) Key() string {
21 | return s.Value()
22 | }
23 |
24 | func (s SuggestionScraped) Value() string {
25 | return s.value
26 | }
27 |
28 | func (s SuggestionScraped) Rank() RankSimpleScraped {
29 | return s.rank
30 | }
31 |
32 | func (s SuggestionScraped) Convert(erCap int) Suggestion {
33 | engineRanks := make([]RankSimple, 0, erCap)
34 | engineRanks = append(engineRanks, s.Rank().Convert())
35 | return Suggestion{
36 | suggestionJSON{
37 | Value: s.Value(),
38 | EngineRanks: engineRanks,
39 | },
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/search/result/r_web_output.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | type WebOutput struct {
4 | webOutputJSON
5 | }
6 |
7 | type webOutputJSON struct {
8 | Web
9 |
10 | FqdnHash string `json:"fqdn_hash,omitempty"`
11 | FqdnHashTimestamp string `json:"fqdn_hash_timestamp,omitempty"`
12 | }
13 |
--------------------------------------------------------------------------------
/src/search/result/r_web_scraped.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | import (
4 | "github.com/hearchco/agent/src/utils/moreurls"
5 | "github.com/rs/zerolog/log"
6 | )
7 |
8 | type WebScraped struct {
9 | url string
10 | title string
11 | description string
12 | rank RankScraped
13 | }
14 |
15 | func (r WebScraped) Key() string {
16 | return r.URL()
17 | }
18 |
19 | func (r WebScraped) URL() string {
20 | if r.url == "" {
21 | log.Panic().Msg("url is empty")
22 | // ^PANIC - Assert because the url should never be empty.
23 | }
24 |
25 | return r.url
26 | }
27 |
28 | func (r WebScraped) Title() string {
29 | if r.title == "" {
30 | log.Panic().Msg("title is empty")
31 | // ^PANIC - Assert because the title should never be empty.
32 | }
33 |
34 | return r.title
35 | }
36 |
37 | func (r WebScraped) Description() string {
38 | return r.description
39 | }
40 |
41 | func (r WebScraped) Rank() RankScraped {
42 | return r.rank
43 | }
44 |
45 | func (r WebScraped) Convert(erCap int) Result {
46 | engineRanks := make([]Rank, 0, erCap)
47 | engineRanks = append(engineRanks, r.Rank().Convert())
48 | return &Web{
49 | webJSON{
50 | URL: r.URL(),
51 | FQDN: moreurls.FQDN(r.URL()),
52 | Title: r.Title(),
53 | Description: r.Description(),
54 | EngineRanks: engineRanks,
55 | },
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/src/search/result/rank.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | type Rank struct {
8 | RankSimple
9 |
10 | rankJSON
11 | }
12 |
13 | type rankJSON struct {
14 | Page int `json:"page"`
15 | OnPageRank int `json:"on_page_rank"`
16 | }
17 |
18 | func (r Rank) Page() int {
19 | return r.rankJSON.Page
20 | }
21 |
22 | func (r *Rank) SetPage(page, onPageRank int) {
23 | r.rankJSON.Page = page
24 | r.rankJSON.OnPageRank = onPageRank
25 | }
26 |
27 | func (r Rank) OnPageRank() int {
28 | return r.rankJSON.OnPageRank
29 | }
30 |
31 | func (r *Rank) SetOnPageRank(onPageRank int) {
32 | r.rankJSON.OnPageRank = onPageRank
33 | }
34 |
35 | func (r *Rank) UpgradeIfBetter(newR Rank) {
36 | if r.Page() > newR.Page() {
37 | r.SetPage(newR.Page(), newR.OnPageRank())
38 | } else if r.Page() == newR.Page() && r.OnPageRank() > newR.OnPageRank() {
39 | r.SetOnPageRank(newR.OnPageRank())
40 | }
41 | }
42 |
43 | func NewRank(searchEngine engines.Name, rank, page, onPageRank int) Rank {
44 | return Rank{
45 | RankSimple{
46 | rankSimpleJSON{
47 | SearchEngine: searchEngine,
48 | Rank: rank,
49 | },
50 | },
51 | rankJSON{
52 | page,
53 | onPageRank,
54 | },
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/src/search/result/rank/filler.go:
--------------------------------------------------------------------------------
1 | package rank
2 |
3 | import (
4 | "sort"
5 |
6 | "github.com/hearchco/agent/src/search/engines"
7 | "github.com/hearchco/agent/src/search/result"
8 | )
9 |
10 | // Calculates Rank value of every EngineRank for each Search Engine individually by using Page and OnPageRank to sort.
11 | func (res Results) fillEngineRankRank() {
12 | seEngineRanks := make([][]*result.Rank, len(engines.NameValues()))
13 |
14 | for _, r := range res {
15 | for i := range r.EngineRanks() {
16 | er := &r.EngineRanks()[i]
17 | seEngineRanks[er.SearchEngine()] = append(seEngineRanks[er.SearchEngine()], er)
18 | }
19 | }
20 |
21 | for _, seer := range seEngineRanks {
22 | sort.Sort(ByPageAndOnPageRank(seer))
23 | for i, er := range seer {
24 | er.SetRank(i + 1)
25 | }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/src/search/result/rank/interfaces.go:
--------------------------------------------------------------------------------
1 | package rank
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | type scoreRanker interface {
8 | Score() float64
9 | }
10 |
11 | type scoreEngineRanker[T ranker] interface {
12 | scoreRanker
13 |
14 | EngineRanks() []T
15 | }
16 |
17 | type ranker interface {
18 | SearchEngine() engines.Name
19 | Rank() int
20 | }
21 |
--------------------------------------------------------------------------------
/src/search/result/rank/results.go:
--------------------------------------------------------------------------------
1 | package rank
2 |
3 | import (
4 | "sort"
5 |
6 | "github.com/hearchco/agent/src/search/category"
7 | "github.com/hearchco/agent/src/search/result"
8 | )
9 |
10 | type Results []result.Result
11 |
12 | // Calculates the Score, sorts by it and then populates the Rank field of every result.
13 | func (r Results) Rank(rconf category.Ranking) {
14 | // Fill Rank field for every EngineRank.
15 | r.fillEngineRankRank()
16 |
17 | // Calculate and set scores.
18 | r.calculateScores(rconf)
19 |
20 | // Sort slice by score.
21 | sort.Sort(ByScore[result.Result](r))
22 |
23 | // Set correct ranks, by iterating over the sorted slice.
24 | r.correctRanks()
25 | }
26 |
27 | func (r Results) correctRanks() {
28 | for i, res := range r {
29 | res.SetRank(i + 1)
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/search/result/rank/score.go:
--------------------------------------------------------------------------------
1 | package rank
2 |
3 | import (
4 | "math"
5 |
6 | "github.com/hearchco/agent/src/search/category"
7 | )
8 |
9 | // Calculates and sets scores for all results.
10 | func (r Results) calculateScores(rconf category.Ranking) {
11 | for _, res := range r {
12 | res.SetScore(calculateScore(res, rconf))
13 | }
14 | }
15 |
16 | // Calculates and sets scores for all results.
17 | func (s Suggestions) calculateScores(rconf category.Ranking) {
18 | for i := range s {
19 | sug := &s[i]
20 | sug.SetScore(calculateScore(sug, rconf))
21 | }
22 | }
23 |
24 | // Calculates the score for one result.
25 | func calculateScore[T ranker](val scoreEngineRanker[T], rconf category.Ranking) float64 {
26 | var rankScoreSum float64 = 0
27 |
28 | // Calculate the sum of the rank scores of all engines.
29 | // The rank score is dividing 100 to invert the priority (the lower the rank, the higher the score).
30 | for _, er := range val.EngineRanks() {
31 | eng := rconf.Engines[er.SearchEngine()]
32 | rankScoreSum += (100.0/math.Pow(float64(er.Rank())*rconf.RankMul+rconf.RankAdd, rconf.RankExp)*rconf.RankScoreMul+rconf.RankScoreAdd)*eng.Mul + eng.Add
33 | }
34 |
35 | // Calculate the average rank score from the sum.
36 | rankScoreAvg := rankScoreSum / float64(len(val.EngineRanks()))
37 |
38 | // Calculate a second score based on the number of times the result was returned.
39 | // Log is used to make the score less sensitive to the number of times returned.
40 | timesReturnedScore := math.Log(float64(len(val.EngineRanks()))*rconf.TimesReturnedMul+rconf.TimesReturnedAdd)*100*rconf.TimesReturnedScoreMul + rconf.TimesReturnedScoreAdd
41 |
42 | return rankScoreAvg + timesReturnedScore
43 | }
44 |
--------------------------------------------------------------------------------
/src/search/result/rank/sort.go:
--------------------------------------------------------------------------------
1 | package rank
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/rs/zerolog/log"
7 |
8 | "github.com/hearchco/agent/src/search/result"
9 | )
10 |
11 | type ByScore[T scoreRanker] []T
12 |
13 | func (r ByScore[T]) Len() int { return len(r) }
14 | func (r ByScore[T]) Swap(i, j int) { r[i], r[j] = r[j], r[i] }
15 | func (r ByScore[T]) Less(i, j int) bool { return r[i].Score() > r[j].Score() }
16 |
17 | type ByPageAndOnPageRank []*result.Rank
18 |
19 | func (r ByPageAndOnPageRank) Len() int { return len(r) }
20 | func (r ByPageAndOnPageRank) Swap(i, j int) { r[i], r[j] = r[j], r[i] }
21 | func (r ByPageAndOnPageRank) Less(i, j int) bool {
22 | if r[i].Page() != r[j].Page() {
23 | return r[i].Page() < r[j].Page()
24 | }
25 |
26 | if r[i].OnPageRank() != r[j].OnPageRank() {
27 | return r[i].OnPageRank() < r[j].OnPageRank()
28 | }
29 |
30 | log.Panic().
31 | Caller().
32 | Str("comparableA", fmt.Sprintf("%v", r[i])).
33 | Str("comparableB", fmt.Sprintf("%v", r[j])).
34 | Msg("Failed at ranking: same page and onpagerank")
35 | // ^PANIC
36 |
37 | panic("Failed at ranking: same page and onpagerank")
38 | }
39 |
--------------------------------------------------------------------------------
/src/search/result/rank/structs_test.go:
--------------------------------------------------------------------------------
1 | package rank
2 |
3 | type testPair struct {
4 | orig Results
5 | expected Results
6 | }
7 |
--------------------------------------------------------------------------------
/src/search/result/rank/suggestions.go:
--------------------------------------------------------------------------------
1 | package rank
2 |
3 | import (
4 | "sort"
5 |
6 | "github.com/hearchco/agent/src/search/category"
7 | "github.com/hearchco/agent/src/search/result"
8 | )
9 |
10 | type Suggestions []result.Suggestion
11 |
12 | // Calculates the Score, sorts by it and then populates the Rank field of every result.
13 | func (s Suggestions) Rank(rconf category.Ranking) {
14 | // Calculate and set scores.
15 | s.calculateScores(rconf)
16 |
17 | // Sort slice by score.
18 | sort.Sort(ByScore[result.Suggestion](s))
19 |
20 | // Set correct ranks, by iterating over the sorted slice.
21 | s.correctRanks()
22 | }
23 |
24 | func (s Suggestions) correctRanks() {
25 | for i := range s {
26 | sug := &s[i]
27 | sug.SetRank(i + 1)
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/src/search/result/rank_scraped.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | type RankScraped struct {
8 | RankSimpleScraped
9 |
10 | page int
11 | onPageRank int
12 | }
13 |
14 | func (r RankScraped) Page() int {
15 | return r.page
16 | }
17 |
18 | func (r RankScraped) OnPageRank() int {
19 | return r.onPageRank
20 | }
21 |
22 | func (r RankScraped) Convert() Rank {
23 | rankSimple := r.RankSimpleScraped.Convert()
24 | return Rank{
25 | rankSimple,
26 | rankJSON{
27 | r.page,
28 | r.onPageRank,
29 | },
30 | }
31 | }
32 |
33 | func NewRankScraped(searchEngine engines.Name, rank, page, onPageRank int) RankScraped {
34 | rankSimpleScraped := NewRankSimpleScraped(searchEngine, rank)
35 | return RankScraped{
36 | rankSimpleScraped,
37 | page,
38 | onPageRank,
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/src/search/result/ranksimple.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | type RankSimple struct {
8 | rankSimpleJSON
9 | }
10 |
11 | type rankSimpleJSON struct {
12 | SearchEngine engines.Name `json:"search_engine"`
13 | Rank int `json:"rank"`
14 | }
15 |
16 | func (r RankSimple) SearchEngine() engines.Name {
17 | return r.rankSimpleJSON.SearchEngine
18 | }
19 |
20 | func (r RankSimple) Rank() int {
21 | return r.rankSimpleJSON.Rank
22 | }
23 |
24 | func (r *RankSimple) SetRank(rank int) {
25 | r.rankSimpleJSON.Rank = rank
26 | }
27 |
28 | func (r *RankSimple) UpgradeIfBetter(newR RankSimple) {
29 | if r.Rank() > newR.Rank() {
30 | r.SetRank(newR.Rank())
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/search/result/ranksimple_scraped.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | import (
4 | "github.com/hearchco/agent/src/search/engines"
5 | )
6 |
7 | type RankSimpleScraped struct {
8 | searchEngine engines.Name
9 | rank int
10 | }
11 |
12 | func (r RankSimpleScraped) SearchEngine() engines.Name {
13 | return r.searchEngine
14 | }
15 |
16 | func (r RankSimpleScraped) Rank() int {
17 | return r.rank
18 | }
19 |
20 | func (r RankSimpleScraped) Convert() RankSimple {
21 | return RankSimple{
22 | rankSimpleJSON{
23 | r.searchEngine,
24 | r.rank,
25 | },
26 | }
27 | }
28 |
29 | func NewRankSimpleScraped(searchEngine engines.Name, rank int) RankSimpleScraped {
30 | return RankSimpleScraped{
31 | searchEngine,
32 | rank,
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/src/search/result/result_map.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | import (
4 | "slices"
5 | "sync"
6 |
7 | "github.com/hearchco/agent/src/search/engines"
8 | "github.com/rs/zerolog/log"
9 | )
10 |
11 | type ResultConcMap struct {
12 | enabledEnginesLen int
13 | titleLen, descLen int
14 | mutex sync.RWMutex
15 | mapp map[string]Result
16 | }
17 |
18 | func NewResultMap(enabledEnginesLen, titleLen, descLen int) ResultConcMap {
19 | return ResultConcMap{
20 | enabledEnginesLen: enabledEnginesLen,
21 | titleLen: titleLen,
22 | descLen: descLen,
23 | mutex: sync.RWMutex{},
24 | mapp: make(map[string]Result),
25 | }
26 | }
27 |
28 | // Passed as pointer because of the mutex.
29 | func (m *ResultConcMap) AddOrUpgrade(val ResultScraped) {
30 | if val.Rank().SearchEngine().String() == "" || val.Rank().SearchEngine() == engines.UNDEFINED {
31 | log.Panic().
32 | Str("engine", val.Rank().SearchEngine().String()).
33 | Msg("Received a result with an undefined search engine")
34 | // ^PANIC - Assert because it should never happen.
35 | }
36 |
37 | // Lock the map due to modifications.
38 | m.mutex.Lock()
39 | defer m.mutex.Unlock()
40 |
41 | mapVal, exists := m.mapp[val.Key()]
42 | if !exists {
43 | // Add the result to the map.
44 | m.mapp[val.Key()] = val.Convert(m.enabledEnginesLen)
45 | } else {
46 | var alreadyIn *Rank
47 |
48 | // Check if the engine rank is already in the result.
49 | for i, er := range mapVal.EngineRanks() {
50 | if val.Rank().SearchEngine() == er.SearchEngine() {
51 | alreadyIn = &mapVal.EngineRanks()[i]
52 | break
53 | }
54 | }
55 |
56 | // Update the result if the new rank is better.
57 | if alreadyIn == nil {
58 | mapVal.AppendEngineRanks(val.Rank().Convert())
59 | } else {
60 | alreadyIn.UpgradeIfBetter(val.Rank().Convert())
61 | }
62 |
63 | // Update the description if the new description is longer.
64 | if len(mapVal.Description()) < len(val.Description()) {
65 | mapVal.SetDescription(val.Description())
66 | }
67 | }
68 | }
69 |
70 | // Passed as pointer because of the mutex.
71 | func (m *ResultConcMap) ExtractWithResponders() ([]Result, []engines.Name) {
72 | m.mutex.RLock()
73 | defer m.mutex.RUnlock()
74 |
75 | results := make([]Result, 0, len(m.mapp))
76 | responders := make([]engines.Name, 0, m.enabledEnginesLen)
77 |
78 | for _, res := range m.mapp {
79 | newRes := res.Shorten(m.titleLen, m.descLen)
80 | newRes.ShrinkEngineRanks()
81 | results = append(results, newRes)
82 | for _, rank := range res.EngineRanks() {
83 | if !slices.Contains(responders, rank.SearchEngine()) {
84 | responders = append(responders, rank.SearchEngine())
85 | }
86 | }
87 | }
88 |
89 | return results, responders
90 | }
91 |
--------------------------------------------------------------------------------
/src/search/result/shorten.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | // Changes the title and description of the result to be at most N and M characters long respectively.
4 | func (r Web) Shorten(maxTitleLength int, maxDescriptionLength int) Result {
5 | short := r
6 | short.webJSON.Title = shortString(r.Title(), maxTitleLength)
7 | short.webJSON.Description = shortString(r.Description(), maxDescriptionLength)
8 | return &short
9 | }
10 |
11 | func (r Images) Shorten(maxTitleLength int, maxDescriptionLength int) Result {
12 | short := r
13 | short.webJSON.Title = shortString(r.Title(), maxTitleLength)
14 | short.webJSON.Description = shortString(r.Description(), maxDescriptionLength)
15 | return &short
16 | }
17 |
18 | func shortString(s string, n int) string {
19 | if n < 0 {
20 | return s
21 | }
22 |
23 | suffix := "..."
24 | if n-len(suffix) <= 0 {
25 | suffix = "" // No room for suffix.
26 | }
27 |
28 | if len(s) > n {
29 | short := firstNchars(s, n-len(suffix))
30 | return short + suffix
31 | }
32 |
33 | return s
34 | }
35 |
36 | func firstNchars(str string, n int) string {
37 | v := []rune(str)
38 | if n < 0 || n >= len(v) {
39 | return str
40 | }
41 | return string(v[:n])
42 | }
43 |
--------------------------------------------------------------------------------
/src/search/result/suggestions_map.go:
--------------------------------------------------------------------------------
1 | package result
2 |
3 | import (
4 | "slices"
5 | "sync"
6 |
7 | "github.com/hearchco/agent/src/search/engines"
8 | "github.com/rs/zerolog/log"
9 | )
10 |
11 | type SuggestionConcMap struct {
12 | enabledEnginesLen int
13 | mutex sync.RWMutex
14 | mapp map[string]Suggestion
15 | }
16 |
17 | func NewSuggestionMap(enabledEnginesLen int) SuggestionConcMap {
18 | return SuggestionConcMap{
19 | enabledEnginesLen: enabledEnginesLen,
20 | mutex: sync.RWMutex{},
21 | mapp: make(map[string]Suggestion),
22 | }
23 | }
24 |
25 | func (m *SuggestionConcMap) AddOrUpgrade(val SuggestionScraped) {
26 | if val.Rank().SearchEngine().String() == "" || val.Rank().SearchEngine() == engines.UNDEFINED {
27 | log.Panic().
28 | Str("engine", val.Rank().SearchEngine().String()).
29 | Msg("Received a suggestion with an undefined search engine")
30 | // ^PANIC - Assert because it should never happen.
31 | }
32 |
33 | // Lock the map due to modifications.
34 | m.mutex.Lock()
35 | defer m.mutex.Unlock()
36 |
37 | mapVal, exists := m.mapp[val.Key()]
38 | if !exists {
39 | // Add the result to the map.
40 | m.mapp[val.Key()] = val.Convert(m.enabledEnginesLen)
41 | } else {
42 | var alreadyIn *RankSimple
43 |
44 | // Check if the engine rank is already in the result.
45 | for i, er := range mapVal.EngineRanks() {
46 | if val.Rank().SearchEngine() == er.SearchEngine() {
47 | alreadyIn = &mapVal.EngineRanks()[i]
48 | break
49 | }
50 | }
51 |
52 | // Update the result if the new rank is better.
53 | if alreadyIn == nil {
54 | mapVal.AppendEngineRanks(val.Rank().Convert())
55 | } else {
56 | alreadyIn.UpgradeIfBetter(val.Rank().Convert())
57 | }
58 | }
59 | }
60 |
61 | func (m *SuggestionConcMap) ExtractWithResponders() ([]Suggestion, []engines.Name) {
62 | m.mutex.RLock()
63 | defer m.mutex.RUnlock()
64 |
65 | suggestions := make([]Suggestion, 0, len(m.mapp))
66 | responders := make([]engines.Name, 0, m.enabledEnginesLen)
67 |
68 | for _, sug := range m.mapp {
69 | sug.ShrinkEngineRanks()
70 | suggestions = append(suggestions, sug)
71 | for _, rank := range sug.EngineRanks() {
72 | if !slices.Contains(responders, rank.SearchEngine()) {
73 | responders = append(responders, rank.SearchEngine())
74 | }
75 | }
76 | }
77 |
78 | return suggestions, responders
79 | }
80 |
--------------------------------------------------------------------------------
/src/search/run_engine.go:
--------------------------------------------------------------------------------
1 | package search
2 |
3 | import (
4 | "sync"
5 |
6 | "github.com/rs/zerolog/log"
7 |
8 | "github.com/hearchco/agent/src/search/engines"
9 | "github.com/hearchco/agent/src/search/engines/options"
10 | "github.com/hearchco/agent/src/search/result"
11 | "github.com/hearchco/agent/src/utils/anonymize"
12 | )
13 |
14 | type Runner[T any] func(string, options.Options, chan T) ([]error, bool)
15 |
16 | func runEngine[T any](groupName string, onceWrap *onceWrapper, concMap result.ConcMapper[T], engName engines.Name, runner Runner[T], query string, opts options.Options) {
17 | // Run the engine only once.
18 | onceWrap.Do(func() {
19 | // Create a buffered channel for the results.
20 | resChan := make(chan T, 100)
21 |
22 | // Start the receiver for the engine.
23 | var receiver sync.WaitGroup
24 | receiver.Add(1)
25 | go createReceiver(&receiver, resChan, concMap)
26 |
27 | log.Trace().
28 | Str("engine", engName.String()).
29 | Str("query", anonymize.String(query)).
30 | Str("group", groupName).
31 | Msg("Started")
32 |
33 | // Run the engine.
34 | errs, scraped := runner(query, opts, resChan)
35 |
36 | if len(errs) > 0 {
37 | onceWrap.Errored()
38 | log.Error().
39 | Errs("errors", errs).
40 | Str("engine", engName.String()).
41 | Str("query", anonymize.String(query)).
42 | Str("group", groupName).
43 | Msg("Error searching")
44 | }
45 |
46 | if !scraped {
47 | log.Debug().
48 | Str("engine", engName.String()).
49 | Str("query", anonymize.String(query)).
50 | Str("group", groupName).
51 | Msg("Failed to scrape any results (probably timed out)")
52 | } else {
53 | onceWrap.Scraped()
54 | }
55 |
56 | // Wait for the receiver to finish.
57 | receiver.Wait()
58 | })
59 | }
60 |
--------------------------------------------------------------------------------
/src/search/scraper/dompaths.go:
--------------------------------------------------------------------------------
1 | package scraper
2 |
3 | type DOMPaths struct {
4 | ResultsContainer string
5 | Result string
6 | URL string
7 | Title string
8 | Description string
9 | }
10 |
11 | type DOMPathsImages struct {
12 | DOMPaths
13 |
14 | OriginalSize struct {
15 | Height string
16 | Width string
17 | }
18 | ThumbnailSize struct {
19 | Height string
20 | Width string
21 | }
22 | ThumbnailURL string
23 | SourceName string
24 | SourceURL string
25 | }
26 |
--------------------------------------------------------------------------------
/src/search/scraper/enginebase.go:
--------------------------------------------------------------------------------
1 | package scraper
2 |
3 | import (
4 | "context"
5 |
6 | "github.com/gocolly/colly/v2"
7 |
8 | "github.com/hearchco/agent/src/search/engines"
9 | )
10 |
11 | // Base struct for every search engine.
12 | type EngineBase struct {
13 | Name engines.Name
14 | Origins []engines.Name
15 | collector *colly.Collector
16 | }
17 |
18 | // Used to get the name of the search engine.
19 | func (e EngineBase) GetName() engines.Name {
20 | return e.Name
21 | }
22 |
23 | // Used to get the origins of the search engine.
24 | func (e EngineBase) GetOrigins() []engines.Name {
25 | return e.Origins
26 | }
27 |
28 | // Used to initialize the EngineBase collector.
29 | func (e *EngineBase) Init(ctx context.Context) {
30 | e.initCollectorOnRequest(ctx)
31 | e.initCollectorOnResponse()
32 | e.initCollectorOnError()
33 | }
34 |
35 | // Used to initialize the EngineBase collector for searching web/images.
36 | func (e *EngineBase) InitSearcher(ctx context.Context) {
37 | e.initCollectorSearcher(ctx)
38 | e.Init(ctx)
39 | }
40 |
41 | // Used to initialize the EngineBase collector for searching suggestions.
42 | func (e *EngineBase) InitSuggester(ctx context.Context) {
43 | e.initCollectorSuggester(ctx)
44 | e.Init(ctx)
45 | }
46 |
--------------------------------------------------------------------------------
/src/search/scraper/interfaces.go:
--------------------------------------------------------------------------------
1 | package scraper
2 |
3 | import (
4 | "context"
5 |
6 | "github.com/hearchco/agent/src/search/engines"
7 | "github.com/hearchco/agent/src/search/engines/options"
8 | "github.com/hearchco/agent/src/search/result"
9 | )
10 |
11 | // Interface that each search engine must implement to be a Search Engine.
12 | type Enginer interface {
13 | GetName() engines.Name
14 | GetOrigins() []engines.Name
15 | Init(context.Context)
16 | }
17 |
18 | // Interface that each search engine must implement to support searching web results.
19 | type WebSearcher interface {
20 | Enginer
21 |
22 | InitSearcher(context.Context)
23 | WebSearch(string, options.Options, chan result.ResultScraped) ([]error, bool)
24 | }
25 |
26 | // Interface that each search engine must implement to support searching image results.
27 | type ImageSearcher interface {
28 | Enginer
29 |
30 | InitSearcher(context.Context)
31 | ImageSearch(string, options.Options, chan result.ResultScraped) ([]error, bool)
32 | }
33 |
34 | // Interface that each search engine must implement to support suggesting.
35 | type Suggester interface {
36 | Enginer
37 |
38 | InitSuggester(context.Context)
39 | Suggest(string, options.Options, chan result.SuggestionScraped) ([]error, bool)
40 | }
41 |
--------------------------------------------------------------------------------
/src/search/scraper/pagecontext.go:
--------------------------------------------------------------------------------
1 | package scraper
2 |
3 | import (
4 | "strconv"
5 |
6 | "github.com/gocolly/colly/v2"
7 | "github.com/rs/zerolog/log"
8 | )
9 |
10 | func (e EngineBase) PageFromContext(ctx *colly.Context) int {
11 | var pageStr string = ctx.Get("page")
12 | page, err := strconv.Atoi(pageStr)
13 | if err != nil {
14 | log.Panic().
15 | Caller().
16 | Err(err).
17 | Str("engine", e.Name.String()).
18 | Str("page", pageStr).
19 | Msg("Failed to convert page number to int")
20 | // ^PANIC
21 | }
22 | return page
23 | }
24 |
--------------------------------------------------------------------------------
/src/search/scraper/pagerankcounter.go:
--------------------------------------------------------------------------------
1 | package scraper
2 |
3 | import (
4 | "sync/atomic"
5 | )
6 |
7 | // A goroutine-safe counter for PageRank.
8 | type PageRankCounter struct {
9 | counts []atomic.Int32
10 | }
11 |
12 | // Create a new PageRankCounter.
13 | func NewPageRankCounter(pages int) PageRankCounter {
14 | return PageRankCounter{counts: make([]atomic.Int32, pages)}
15 | }
16 |
17 | // Increment the count for a page.
18 | func (prc *PageRankCounter) Increment(page int) {
19 | prc.counts[page].Add(1)
20 | }
21 |
22 | // Get the count for a page + 1.
23 | func (prc *PageRankCounter) GetPlusOne(page int) int {
24 | return int(prc.counts[page].Load() + 1)
25 | }
26 |
--------------------------------------------------------------------------------
/src/search/scraper/parse/fields.go:
--------------------------------------------------------------------------------
1 | package parse
2 |
3 | import (
4 | "strings"
5 |
6 | "github.com/PuerkitoBio/goquery"
7 | "github.com/rs/zerolog/log"
8 |
9 | "github.com/hearchco/agent/src/search/engines"
10 | "github.com/hearchco/agent/src/search/scraper"
11 | )
12 |
13 | // Fetches from DOM via dompaths. Returns url, title and description.
14 | func RawFieldsFromDOM(dom *goquery.Selection, dompaths scraper.DOMPaths, seName engines.Name) (string, string, string) {
15 | descText := dom.Find(dompaths.Description).Text()
16 | titleDom := dom.Find(dompaths.Title)
17 | titleText := titleDom.Text()
18 |
19 | // Title and URL selector are often the same.
20 | var linkDom *goquery.Selection
21 | if dompaths.URL == dompaths.Result {
22 | linkDom = titleDom
23 | } else {
24 | linkDom = dom.Find(dompaths.URL)
25 | }
26 |
27 | linkText, hrefExists := linkDom.Attr("href")
28 | if !hrefExists {
29 | log.Error().
30 | Caller().
31 | Str("engine", seName.String()).
32 | Str("url", linkText).
33 | Str("title", titleText).
34 | Str("description", descText).
35 | Msgf("Href attribute doesn't exist on matched URL element (%v)", dompaths.URL)
36 |
37 | return "", "", ""
38 | }
39 |
40 | return linkText, titleText, descText
41 | }
42 |
43 | // Fetches from DOM via dompaths and sanitizes. Returns url, title and description.
44 | func FieldsFromDOM(dom *goquery.Selection, dompaths scraper.DOMPaths, seName engines.Name) (string, string, string) {
45 | return SanitizeFields(RawFieldsFromDOM(dom, dompaths, seName))
46 | }
47 |
48 | func SanitizeURL(urlText string) string {
49 | return ParseURL(urlText)
50 | }
51 |
52 | func SanitizeTitle(titleText string) string {
53 | return ParseTextWithHTML(strings.TrimSpace(titleText))
54 | }
55 |
56 | func SanitizeDescription(descText string) string {
57 | return ParseTextWithHTML(strings.TrimSpace(descText))
58 | }
59 |
60 | func SanitizeFields(linkText string, titleText string, descText string) (string, string, string) {
61 | return SanitizeURL(linkText), SanitizeTitle(titleText), SanitizeDescription(descText)
62 | }
63 |
--------------------------------------------------------------------------------
/src/search/scraper/parse/parse.go:
--------------------------------------------------------------------------------
1 | package parse
2 |
3 | import (
4 | "fmt"
5 | "net/url"
6 | "strings"
7 |
8 | "github.com/PuerkitoBio/goquery"
9 | "github.com/rs/zerolog/log"
10 | "golang.org/x/net/html"
11 | )
12 |
13 | func ParseURL(rawURL string) string {
14 | urll, err := parseURL(rawURL)
15 | if err != nil {
16 | log.Error().
17 | Caller().
18 | Err(err).
19 | Str("url", urll).
20 | Msg("Couldn't parse url")
21 | return rawURL
22 | }
23 | return urll
24 | }
25 |
26 | func parseURL(rawURL string) (string, error) {
27 | trimmedRawURL := strings.TrimSpace(rawURL)
28 | parsedURL, err := url.Parse(trimmedRawURL)
29 | if err != nil {
30 | return "", fmt.Errorf("parse.parseURL(): failed url.Parse() on url(%v). error: %w", rawURL, err)
31 | }
32 |
33 | urlString := parsedURL.String()
34 | if len(urlString) > 0 && urlString[len(urlString)-1] == '/' {
35 | urlString = urlString[:len(urlString)-1]
36 | }
37 |
38 | return urlString, nil
39 | }
40 |
41 | func ParseTextWithHTML(rawHTML string) string {
42 | text, err := parseTextWithHTML(rawHTML)
43 | if err != nil {
44 | log.Error().
45 | Caller().
46 | Err(err).
47 | Str("html", rawHTML).
48 | Msg("Failed parsing text with html")
49 | return rawHTML
50 | }
51 | return text
52 | }
53 |
54 | func parseTextWithHTML(rawHTML string) (string, error) {
55 | var result string = ""
56 |
57 | htmlNode, err := html.ParseFragment(strings.NewReader(rawHTML), nil)
58 | if err != nil {
59 | return "", fmt.Errorf("Failed html.ParseFragment on %v: %w", rawHTML, err)
60 | }
61 |
62 | for _, el := range htmlNode {
63 | sel := goquery.NewDocumentFromNode(el)
64 | result += sel.Text()
65 | }
66 |
67 | return result, nil
68 | }
69 |
--------------------------------------------------------------------------------
/src/search/scraper/requests.go:
--------------------------------------------------------------------------------
1 | package scraper
2 |
3 | import (
4 | "fmt"
5 | "io"
6 | "net/http"
7 |
8 | "github.com/gocolly/colly/v2"
9 | "github.com/rs/zerolog/log"
10 | )
11 |
12 | func (e EngineBase) Get(ctx *colly.Context, urll string, anonUrll string) error {
13 | log.Trace().
14 | Str("engine", e.Name.String()).
15 | Str("url", anonUrll).
16 | Str("method", http.MethodGet).
17 | Msg("Making a new request")
18 |
19 | if err := e.collector.Request(http.MethodGet, urll, nil, ctx, nil); err != nil {
20 | return fmt.Errorf("%v: failed GET request to %v with %w", e.Name.String(), anonUrll, err)
21 | }
22 |
23 | return nil
24 | }
25 |
26 | func (e EngineBase) Post(ctx *colly.Context, urll string, body io.Reader, anonBody string) error {
27 | log.Trace().
28 | Str("engine", e.Name.String()).
29 | Str("url", urll).
30 | Str("body", anonBody).
31 | Str("method", http.MethodPost).
32 | Msg("Making a new request")
33 |
34 | if err := e.collector.Request(http.MethodPost, urll, body, ctx, nil); err != nil {
35 | return fmt.Errorf("%v: failed POST request to %v with %w", e.Name.String(), urll, err)
36 | }
37 |
38 | return nil
39 | }
40 |
--------------------------------------------------------------------------------
/src/search/scraper/scrape.go:
--------------------------------------------------------------------------------
1 | package scraper
2 |
3 | import (
4 | "github.com/gocolly/colly/v2"
5 | )
6 |
7 | // OnHTML registers a function. Function will be executed on every HTML
8 | // element matched by the GoQuery Selector parameter.
9 | // GoQuery Selector is a selector used by https://github.com/PuerkitoBio/goquery.
10 | func (e *EngineBase) OnHTML(goquerySelector string, f colly.HTMLCallback) {
11 | e.collector.OnHTML(goquerySelector, f)
12 | }
13 |
14 | // OnResponse registers a function. Function will be executed on every response.
15 | func (e *EngineBase) OnResponse(f colly.ResponseCallback) {
16 | e.collector.OnResponse(f)
17 | }
18 |
19 | // OnRequest registers a function. Function will be executed on every
20 | // request made by the Collector.
21 | func (e *EngineBase) OnRequest(f colly.RequestCallback) {
22 | e.collector.OnRequest(f)
23 | }
24 |
25 | // Wait returns when the collector jobs are finished.
26 | func (e EngineBase) Wait() {
27 | e.collector.Wait()
28 | }
29 |
--------------------------------------------------------------------------------
/src/search/scraper/suggest_resp.go:
--------------------------------------------------------------------------------
1 | package scraper
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | )
7 |
8 | // Converts a opensearch.xml compatible suggestions API JSON to a slice of suggestions.
9 | func SuggestRespToSuggestions(data []byte) ([]string, error) {
10 | // Define a structure that matches the JSON structure.
11 | var resp []any
12 |
13 | // Unmarshal the JSON data.
14 | if err := json.Unmarshal(data, &resp); err != nil {
15 | return nil, fmt.Errorf("failed to unmarshal JSON: %w", err)
16 | }
17 |
18 | // Check the structure and extract the slice of strings.
19 | if len(resp) < 2 {
20 | return nil, fmt.Errorf("unexpected JSON structure")
21 | }
22 |
23 | // Assert the second element is a slice.
24 | strSlice, ok := resp[1].([]any)
25 | if !ok {
26 | return nil, fmt.Errorf("unexpected type for second element")
27 | }
28 |
29 | // Error if no suggestions returned.
30 | if len(strSlice) == 0 {
31 | return nil, fmt.Errorf("empty suggestions")
32 | }
33 |
34 | // Convert to slice of strings.
35 | suggs := make([]string, 0, len(strSlice))
36 | for _, item := range strSlice {
37 | if sug, ok := item.(string); !ok {
38 | return nil, fmt.Errorf("unexpected type in string slice")
39 | } else {
40 | suggs = append(suggs, sug)
41 | }
42 |
43 | }
44 |
45 | return suggs, nil
46 | }
47 |
--------------------------------------------------------------------------------
/src/search/scraper/timeout.go:
--------------------------------------------------------------------------------
1 | package scraper
2 |
3 | import (
4 | "context"
5 | "net"
6 | "strings"
7 | )
8 |
9 | func IsTimeoutError(err error) bool {
10 | // Check if the error is a cancelled context error.
11 | if strings.HasSuffix(err.Error(), context.Canceled.Error()) {
12 | return true
13 | }
14 |
15 | // Check if the error is a timeout error.
16 | if perr, ok := err.(net.Error); ok && perr.Timeout() {
17 | return true
18 | }
19 |
20 | return false
21 | }
22 |
--------------------------------------------------------------------------------
/src/search/searchtype/name.go:
--------------------------------------------------------------------------------
1 | package searchtype
2 |
3 | import (
4 | "fmt"
5 | )
6 |
7 | type Name string
8 |
9 | const (
10 | WEB Name = "web"
11 | IMAGES Name = "images"
12 | SUGGESTIONS Name = "suggestions"
13 | )
14 |
15 | func (st Name) String() string {
16 | return string(st)
17 | }
18 |
19 | // Converts a string to a search type name if it exists.
20 | // Otherwise returns an error.
21 | func FromString(st string) (Name, error) {
22 | switch st {
23 | case WEB.String():
24 | return WEB, nil
25 | case IMAGES.String():
26 | return IMAGES, nil
27 | case SUGGESTIONS.String():
28 | return SUGGESTIONS, nil
29 | default:
30 | return "", fmt.Errorf("search type %q is not defined", st)
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/search/useragent/useragent.go:
--------------------------------------------------------------------------------
1 | package useragent
2 |
3 | import (
4 | "fmt"
5 | "math/rand"
6 | "slices"
7 | "time"
8 |
9 | "github.com/rs/zerolog/log"
10 | )
11 |
12 | var browsers = [...]string{"chrome", "edge"}
13 | var versions = [...]int{127, 128}
14 |
15 | type userAgentWithHeaders struct {
16 | UserAgent string
17 | SecCHUA string
18 | SecCHUAMobile string
19 | SecCHUAPlatform string
20 | }
21 |
22 | func userAgentStruct(browser string, version int) userAgentWithHeaders {
23 | if !slices.Contains(browsers[:], browser) {
24 | log.Panic().
25 | Str("browser", browser).
26 | Msg("Invalid browser")
27 | // ^PANIC - This should never happen
28 | }
29 | if !slices.Contains(versions[:], version) {
30 | log.Panic().
31 | Int("version", version).
32 | Msg("Invalid version")
33 | // ^PANIC - This should never happen
34 | }
35 |
36 | const userAgentTemplate = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%d.0.0.0 Safari/537.36"
37 | userAgent := fmt.Sprintf(userAgentTemplate, version)
38 | if browser == "edge" {
39 | userAgent = fmt.Sprintf("%s Edg/%d.0.0.0", userAgent, version)
40 | }
41 |
42 | const secCHUATemplate = `"Chromium";v="%d", "Not;A=Brand";v="24", "%s";v="%d"`
43 | secCHUA := fmt.Sprintf(secCHUATemplate, version, "Google Chrome", version)
44 | if browser == "edge" {
45 | secCHUA = fmt.Sprintf(secCHUATemplate, version, "Microsoft Edge", version)
46 | }
47 |
48 | return userAgentWithHeaders{
49 | userAgent,
50 | secCHUA,
51 | "?0",
52 | `"Windows"`,
53 | }
54 | }
55 |
56 | func randomUserAgentStruct() userAgentWithHeaders {
57 | // WARNING: Will stop working after year 2262.
58 | randSrc := rand.NewSource(time.Now().UnixNano())
59 | randGen := rand.New(randSrc)
60 | return userAgentStruct(browsers[randGen.Intn(len(browsers))], versions[randGen.Intn(len(versions))])
61 | }
62 |
63 | func RandomUserAgent() string {
64 | randomUA := randomUserAgentStruct()
65 | return randomUA.UserAgent
66 | }
67 |
68 | func RandomUserAgentWithHeaders() userAgentWithHeaders {
69 | return randomUserAgentStruct()
70 | }
71 |
--------------------------------------------------------------------------------
/src/utils/anonymize/hash.go:
--------------------------------------------------------------------------------
1 | package anonymize
2 |
3 | import (
4 | "crypto/hmac"
5 | "crypto/sha256"
6 | "encoding/base64"
7 | "fmt"
8 | "time"
9 |
10 | "github.com/hearchco/agent/src/utils/moretime"
11 | )
12 |
13 | // Format used for the timestamps.
14 | const timestampFormat = time.RFC3339
15 |
16 | // Returns the hash of the message.
17 | func CalculateHashBase64(message string) string {
18 | hasher := sha256.New()
19 | hasher.Write([]byte(message))
20 | hashedBinary := hasher.Sum(nil)
21 | hashedString := base64.URLEncoding.EncodeToString(hashedBinary)
22 | return hashedString
23 | }
24 |
25 | // Returns the hash of the message and the timestamp used to generate it.
26 | func CalculateHMACBase64(message, key string, t time.Time) (string, string) {
27 | hasher := hmac.New(sha256.New, []byte(key))
28 | timestamp := base64.URLEncoding.EncodeToString([]byte(t.Format(timestampFormat)))
29 |
30 | hasher.Write([]byte(timestamp))
31 | hasher.Write([]byte(message))
32 | hashedBinary := hasher.Sum(nil)
33 |
34 | hashedString := base64.URLEncoding.EncodeToString(hashedBinary)
35 | return hashedString, timestamp
36 | }
37 |
38 | // Returns whether the tag is valid for the given message, timestamp and key.
39 | func VerifyHMACBase64(tag, orig, key, timestampB64 string) (bool, error) {
40 | timestamp, err := base64.URLEncoding.DecodeString(timestampB64)
41 | if err != nil {
42 | return false, fmt.Errorf("error decoding timestamp: %v", err)
43 | }
44 |
45 | t, err := time.Parse(timestampFormat, string(timestamp))
46 | if err != nil {
47 | return false, fmt.Errorf("error parsing timestamp: %v", err)
48 | }
49 |
50 | // TODO: Make duration of the timestamp configurable.
51 | if time.Since(t) > moretime.Day {
52 | return false, nil
53 | }
54 |
55 | verificator, _ := CalculateHMACBase64(orig, key, t)
56 | return tag == verificator, nil
57 | }
58 |
--------------------------------------------------------------------------------
/src/utils/anonymize/hash_test.go:
--------------------------------------------------------------------------------
1 | package anonymize
2 |
3 | import (
4 | "testing"
5 | )
6 |
7 | func TestHashToSHA256B64(t *testing.T) {
8 | // original string, expected hash (sha256 returns binary and is encoded to base64)
9 | tests := []testPair{
10 | {"", "47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU="},
11 | {"banana death", "e8kN64XJ4Icr6Tl9VYrBRj50UJCPlyillODm3vVNk2g="},
12 | {"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", "LYwvbZeMohcStfbeNsnTH6jpak-l2P-LAYjfuefBcbs="},
13 | {"Ćao hrčko!! 🐹", "_Y3KWzrx2UkeTp8b--48L6OFgv51JWPlZArjoFOrmbw="},
14 | }
15 |
16 | for _, test := range tests {
17 | hash := CalculateHashBase64(test.orig)
18 | if hash != test.expected {
19 | t.Errorf("HashToSHA256B64(%q) = %q, want %q", test.orig, hash, test.expected)
20 | }
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/src/utils/anonymize/string.go:
--------------------------------------------------------------------------------
1 | package anonymize
2 |
3 | import (
4 | "math/rand"
5 | "sort"
6 | "strings"
7 | "time"
8 | )
9 |
10 | // Anonymize string
11 | func String(orig string) string {
12 | return shuffle(deduplicate(orig))
13 | }
14 |
15 | // Anonymize substring of a string
16 | func Substring(orig string, ssToAnon string) string {
17 | return strings.ReplaceAll(orig, ssToAnon, String(ssToAnon))
18 | }
19 |
20 | // Remove duplicate characters from string.
21 | func deduplicate(orig string) string {
22 | dedupStr := ""
23 | encountered := make(map[rune]bool)
24 |
25 | for _, char := range orig {
26 | if !encountered[char] {
27 | encountered[char] = true
28 | dedupStr += string(char)
29 | }
30 | }
31 |
32 | return dedupStr
33 | }
34 |
35 | // Shuffle string because deduplicate retains the order of letters.
36 | func shuffle(orig string) string {
37 | inRune := []rune(orig)
38 |
39 | // WARNING: In year 2262, this will break.
40 | rng := rand.New(rand.NewSource(time.Now().UnixNano()))
41 | rng.Shuffle(len(inRune), func(i, j int) {
42 | inRune[i], inRune[j] = inRune[j], inRune[i]
43 | })
44 |
45 | return string(inRune)
46 | }
47 |
48 | // Sort string characters lexicographically.
49 | func sortString(orig string) string {
50 | // Convert the string to a slice of characters.
51 | characters := strings.Split(orig, "")
52 | sort.Strings(characters)
53 | return strings.Join(characters, "")
54 | }
55 |
--------------------------------------------------------------------------------
/src/utils/anonymize/string_test.go:
--------------------------------------------------------------------------------
1 | package anonymize
2 |
3 | import (
4 | "testing"
5 | )
6 |
7 | func TestDeduplicate(t *testing.T) {
8 | // original string, expected deduplicated string
9 | tests := []testPair{
10 | {"", ""},
11 | {"gmail", "gmail"},
12 | {"banana death", "ban deth"},
13 | {"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", "Lorem ipsudlta,cngbq.UvxDhfE"},
14 | }
15 |
16 | for _, test := range tests {
17 | deduplicated := deduplicate(test.orig)
18 | if deduplicated != test.expected {
19 | t.Errorf("deduplicate(%q) = %q, want %q", test.orig, deduplicated, test.expected)
20 | }
21 | }
22 | }
23 |
24 | func TestSortString(t *testing.T) {
25 | // original string, sorted string
26 | tests := []testPair{
27 | {"", ""},
28 | {"gmail", "agilm"},
29 | {"banana death", " aaaabdehnnt"},
30 | {
31 | "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
32 | " ,,.Laaaaaaabccccddddddddeeeeeeeeeeeggiiiiiiiiiiilllllmmmmmmnnnnnoooooooooopppqrrrrrrsssssstttttttttuuuuuu",
33 | },
34 | }
35 |
36 | for _, test := range tests {
37 | sorted := sortString(test.orig)
38 |
39 | if sorted != test.expected {
40 | t.Errorf("SortString(%q) = %q, want %q", test.orig, sorted, test.expected)
41 | }
42 | }
43 | }
44 |
45 | func TestShuffle(t *testing.T) {
46 | // original string, sorted string
47 | tests := []testPair{
48 | {"", ""},
49 | {"gmail", "agilm"},
50 | {"banana death", " aaaabdehnnt"},
51 | {
52 | "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
53 | " ,,.Laaaaaaabccccddddddddeeeeeeeeeeeggiiiiiiiiiiilllllmmmmmmnnnnnoooooooooopppqrrrrrrsssssstttttttttuuuuuu",
54 | },
55 | }
56 |
57 | for _, test := range tests {
58 | shuffled := shuffle(test.orig)
59 | shuffledSorted := sortString(shuffled)
60 |
61 | if shuffledSorted != test.expected {
62 | t.Errorf("SortString(Shuffle(%q)) = %q, want %q", test.orig, shuffledSorted, test.expected)
63 | }
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/src/utils/anonymize/structs_test.go:
--------------------------------------------------------------------------------
1 | package anonymize
2 |
3 | type testPair struct {
4 | orig string
5 | expected string
6 | }
7 |
--------------------------------------------------------------------------------
/src/utils/gotypelimits/ints.go:
--------------------------------------------------------------------------------
1 | package gotypelimits
2 |
3 | // const MaxInt8 = int8(MaxUint8 >> 1)
4 | // const MinInt8 = -MaxInt8 - 1
5 | // const MaxInt16 = int16(MaxUint16 >> 1)
6 | // const MinInt16 = -MaxInt16 - 1
7 | // const MaxInt32 = int32(MaxUint32 >> 1)
8 | // const MinInt32 = -MaxInt32 - 1
9 | // const MaxInt64 = int64(MaxUint64 >> 1)
10 | // const MinInt64 = -MaxInt64 - 1
11 | const MaxInt = int(MaxUint >> 1)
12 |
13 | // const MinInt = -MaxInt - 1
14 |
--------------------------------------------------------------------------------
/src/utils/gotypelimits/uints.go:
--------------------------------------------------------------------------------
1 | package gotypelimits
2 |
3 | // const MaxUint8 = ^uint8(0)
4 | // const MinUint8 = 0
5 | // const MaxUint16 = ^uint16(0)
6 | // const MinUint16 = 0
7 | // const MaxUint32 = ^uint32(0)
8 | // const MinUint32 = 0
9 | // const MaxUint64 = ^uint64(0)
10 | // const MinUint64 = 0
11 | const MaxUint = ^uint(0)
12 |
13 | // const MinUint = 0
14 |
--------------------------------------------------------------------------------
/src/utils/kvpair/kvpair.go:
--------------------------------------------------------------------------------
1 | package kvpair
2 |
3 | import (
4 | "net/url"
5 |
6 | "github.com/rs/zerolog/log"
7 | )
8 |
9 | // KVPair struct, a simple key/value string pair.
10 | type KVPair struct {
11 | key string
12 | value string
13 | }
14 |
15 | // Constructs a new KVPair with provided key and value.
16 | func NewKVPair(k, v string) KVPair {
17 | kv := KVPair{k, v}
18 | kv.assert()
19 | return kv
20 | }
21 |
22 | // Private assert function to ensure key and value are not empty.
23 | // Panics if either key or value are empty.
24 | func (kv KVPair) assert() {
25 | if kv.key == "" || kv.value == "" {
26 | log.Panic().
27 | Str("key", kv.key).
28 | Str("value", kv.value).
29 | Msg("Empty key or value in KVPair")
30 | // ^PANIC - Assert proper values in KVPair.
31 | }
32 | }
33 |
34 | // Returns the key.
35 | func (kv KVPair) Key() string {
36 | kv.assert()
37 | return kv.key
38 | }
39 |
40 | // Returns the value.
41 | func (kv KVPair) Value() string {
42 | kv.assert()
43 | return kv.value
44 | }
45 |
46 | // Sets the value.
47 | func (kv *KVPair) SetValue(v string) {
48 | kv.assert()
49 | kv.value = v
50 | kv.assert()
51 | }
52 |
53 | // Returns a copy of the KVPair.
54 | func (kv KVPair) Copy() KVPair {
55 | kv.assert()
56 | return NewKVPair(kv.key, kv.value)
57 | }
58 |
59 | // Returns raw KVPair in format "foo=bar".
60 | func (kv KVPair) String() string {
61 | kv.assert()
62 | return kv.key + "=" + kv.value
63 | }
64 |
65 | // Returns URL encoded KVPair in format "foo=bar".
66 | // Calls url.QueryEscape on both key and value.
67 | func (kv KVPair) QueryEscape() string {
68 | kv.assert()
69 | return url.QueryEscape(kv.key) + "=" + url.QueryEscape(kv.value)
70 | }
71 |
--------------------------------------------------------------------------------
/src/utils/morestrings/join.go:
--------------------------------------------------------------------------------
1 | package morestrings
2 |
3 | import (
4 | "strings"
5 | )
6 |
7 | // JoinNonEmpty concatenates the non empty elements of its first argument to create a single string.
8 | // The beg string is placen at the beginning, unless there are no elements.
9 | // The separator string sep is placed between elements in the resulting string.
10 | func JoinNonEmpty(beg, sep string, elems ...string) string {
11 | var nonEmptyElems = make([]string, 0, len(elems))
12 | for _, elem := range elems {
13 | if elem != "" {
14 | nonEmptyElems = append(nonEmptyElems, elem)
15 | }
16 | }
17 |
18 | if len(nonEmptyElems) == 0 {
19 | return ""
20 | } else if len(nonEmptyElems) == 1 {
21 | return beg + nonEmptyElems[0]
22 | } else {
23 | return beg + strings.Join(nonEmptyElems, sep)
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/src/utils/moretime/convert.go:
--------------------------------------------------------------------------------
1 | package moretime
2 |
3 | import (
4 | "strconv"
5 | "time"
6 |
7 | "github.com/rs/zerolog/log"
8 | )
9 |
10 | func handleAtoi(s string) int64 {
11 | i, err := strconv.Atoi(s)
12 | if err != nil {
13 | log.Panic().
14 | Caller().
15 | Err(err).
16 | Msg("Failed converting string to int")
17 | // ^PANIC
18 | }
19 | return int64(i)
20 | }
21 |
22 | func convertToDurationWithoutLastChar(s string) time.Duration {
23 | return time.Duration(handleAtoi(s[:len(s)-1]))
24 | }
25 |
26 | /*
27 | Converts the following to time.Duration:
28 |
29 | "1y" -> 1 year,
30 | "2M" -> 2 months,
31 | "3w" -> 3 weeks,
32 | "4d" -> 4 days,
33 | "5h" -> 5 hours,
34 | "6m" -> 6 minutes,
35 | "7s" -> 7 seconds,
36 | "8"-> 8 milliseconds
37 | */
38 | func ConvertFromFancyTime(fancy string) time.Duration {
39 | switch fancy[len(fancy)-1] {
40 | case 'y':
41 | return convertToDurationWithoutLastChar(fancy) * Year
42 | case 'M':
43 | return convertToDurationWithoutLastChar(fancy) * Month
44 | case 'w':
45 | return convertToDurationWithoutLastChar(fancy) * Week
46 | case 'd':
47 | return convertToDurationWithoutLastChar(fancy) * Day
48 | case 'h':
49 | return convertToDurationWithoutLastChar(fancy) * time.Hour
50 | case 'm':
51 | return convertToDurationWithoutLastChar(fancy) * time.Minute
52 | case 's':
53 | return convertToDurationWithoutLastChar(fancy) * time.Second
54 | default:
55 | return time.Duration(handleAtoi(fancy)) * time.Millisecond
56 | }
57 | }
58 |
59 | // Converts to milliseconds.
60 | func ConvertToFancyTime(d time.Duration) string {
61 | return strconv.Itoa(int(d.Milliseconds()))
62 | }
63 |
--------------------------------------------------------------------------------
/src/utils/moretime/types.go:
--------------------------------------------------------------------------------
1 | package moretime
2 |
3 | import (
4 | "time"
5 | )
6 |
7 | const Day = 24 * time.Hour
8 | const Week = 7 * Day
9 | const Month = 30 * Day
10 | const Quarter = 3 * Month
11 | const HalfYear = 6 * Month
12 | const Year = 365 * Day
13 |
--------------------------------------------------------------------------------
/src/utils/moreurls/build.go:
--------------------------------------------------------------------------------
1 | package moreurls
2 |
3 | import (
4 | "net/url"
5 |
6 | "github.com/rs/zerolog/log"
7 | )
8 |
9 | // Constructs a URL with the given parameters.
10 | func Build(urll string, params Params) string {
11 | // Parse the URL.
12 | u, err := url.Parse(urll)
13 | if err != nil {
14 | log.Panic().
15 | Err(err).
16 | Str("url", urll).
17 | Msg("Failed to parse the URL")
18 | // ^PANIC - Assert correct URL
19 | }
20 |
21 | // Convert the parameters to encoded RawQuery keeping the order of keys.
22 | u.RawQuery = params.QueryEscape()
23 |
24 | return u.String()
25 | }
26 |
--------------------------------------------------------------------------------
/src/utils/moreurls/fqdn.go:
--------------------------------------------------------------------------------
1 | package moreurls
2 |
3 | import (
4 | "net/url"
5 |
6 | "github.com/rs/zerolog/log"
7 | )
8 |
9 | // Returns the fully qualified domain name of the URL.
10 | func FQDN(urll string) string {
11 | // Check if the url is empty.
12 | if urll == "" {
13 | log.Panic().
14 | Str("url", urll).
15 | Msg("URL is empty")
16 | }
17 |
18 | // Parse the URL.
19 | u, err := url.Parse(urll)
20 | if err != nil {
21 | log.Panic().
22 | Err(err).
23 | Str("url", urll).
24 | Msg("Failed to parse the URL")
25 | // ^PANIC - Assert correct URL.
26 | }
27 |
28 | // Check if the hostname is empty.
29 | h := u.Hostname()
30 | if h == "" {
31 | log.Panic().
32 | Str("url", urll).
33 | Msg("Hostname is empty")
34 | // ^PANIC - Assert non-empty URL.
35 | }
36 |
37 | return h
38 | }
39 |
--------------------------------------------------------------------------------