├── .air.toml ├── .dockerignore ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── dependabot.yml └── workflows │ ├── ci_codeql.yml │ ├── ci_main.yml │ ├── ci_test.yml │ └── release.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── SECURITY.md ├── docker-compose.yaml ├── docker └── Dockerfile ├── docs ├── example_category.json └── hearchco.service ├── generate ├── enginer │ ├── enginer.go │ ├── structs.go │ └── util.go └── exchanger │ ├── exchanger.go │ ├── structs.go │ └── util.go ├── go.mod ├── go.sum ├── goreleaser ├── develop.yml └── release.yml ├── hearchco_example.yaml ├── scripts ├── test-dynamodb-docker.sh ├── test-dynamodb-podman.sh ├── test-dynamodb.sh ├── test-engines.sh ├── test-redis-docker.sh ├── test-redis-podman.sh ├── test-redis.sh └── test.sh └── src ├── cache ├── actions_currencies.go ├── db.go ├── driver.go ├── dynamodb │ ├── dynamodb.go │ └── dynamodb_test.go ├── nocache │ ├── nocache.go │ └── nocache_test.go └── redis │ ├── redis.go │ └── redis_test.go ├── cli ├── flags.go ├── setup.go └── version.go ├── config ├── defaults.go ├── load.go ├── structs_config.go ├── structs_engines.go ├── structs_exchange.go └── structs_server.go ├── exchange ├── currency │ ├── currency.go │ └── map.go ├── engines │ ├── currencyapi │ │ ├── exchange.go │ │ ├── info.go │ │ ├── json.go │ │ ├── new.go │ │ └── note.md │ ├── exchanger.go │ ├── exchangerateapi │ │ ├── exchange.go │ │ ├── info.go │ │ ├── json.go │ │ └── new.go │ ├── frankfurter │ │ ├── exchange.go │ │ ├── info.go │ │ ├── json.go │ │ └── new.go │ └── name.go └── exchange.go ├── logger └── setup.go ├── main.go ├── profiler └── run.go ├── router ├── lambda.go ├── middlewares │ ├── compress.go │ ├── logging.go │ └── setup.go ├── router.go └── routes │ ├── params.go │ ├── responses.go │ ├── route_currencies.go │ ├── route_exchange.go │ ├── route_image_proxy.go │ ├── route_search_images.go │ ├── route_search_suggestions.go │ ├── route_search_web.go │ ├── setup.go │ └── writers.go ├── search ├── category │ ├── convert.go │ ├── disabled.go │ ├── json.go │ └── type.go ├── context_cancel.go ├── engines │ ├── _engines_test │ │ ├── s_images.go │ │ ├── s_suggestions.go │ │ ├── s_web.go │ │ └── structs.go │ ├── bing │ │ ├── bing.md │ │ ├── dompaths.go │ │ ├── info.go │ │ ├── json.go │ │ ├── new.go │ │ ├── params.go │ │ ├── s_images.go │ │ ├── s_images_test.go │ │ ├── s_web.go │ │ ├── s_web_test.go │ │ └── telemetry.go │ ├── brave │ │ ├── dompaths.go │ │ ├── info.go │ │ ├── new.go │ │ ├── params.go │ │ ├── s_web.go │ │ └── s_web_test.go │ ├── duckduckgo │ │ ├── ddg.md │ │ ├── dompaths.go │ │ ├── info.go │ │ ├── new.go │ │ ├── params.go │ │ ├── s_suggestions.go │ │ ├── s_suggestions_test.go │ │ ├── s_web.go │ │ └── s_web_test.go │ ├── etools │ │ ├── dompaths.go │ │ ├── etools.md │ │ ├── info.go │ │ ├── new.go │ │ ├── params.go │ │ ├── s_web.go │ │ └── s_web_test.go │ ├── google │ │ ├── dompaths.go │ │ ├── info.go │ │ ├── json.go │ │ ├── new.go │ │ ├── params.go │ │ ├── s_images.go │ │ ├── s_images_test.go │ │ ├── s_suggestions.go │ │ ├── s_suggestions_test.go │ │ ├── s_web.go │ │ └── s_web_test.go │ ├── googlescholar │ │ ├── dompaths.go │ │ ├── info.go │ │ ├── new.go │ │ ├── params.go │ │ ├── s_web.go │ │ ├── s_web_test.go │ │ └── telemetry.go │ ├── mojeek │ │ ├── dompaths.go │ │ ├── info.go │ │ ├── new.go │ │ ├── params.go │ │ ├── s_web.go │ │ └── s_web_test.go │ ├── name.go │ ├── options │ │ ├── locale.go │ │ └── structs.go │ ├── presearch │ │ ├── info.go │ │ ├── json.go │ │ ├── new.go │ │ ├── params.go │ │ ├── presearch.md │ │ ├── s_web.go │ │ └── s_web_test.go │ ├── qwant │ │ ├── info.go │ │ ├── json.go │ │ ├── new.go │ │ ├── params.go │ │ ├── qwant.md │ │ ├── s_web.go │ │ └── s_web_test.go │ ├── startpage │ │ ├── dompaths.go │ │ ├── info.go │ │ ├── new.go │ │ ├── params.go │ │ ├── s_web.go │ │ ├── s_web_test.go │ │ └── startpage.md │ ├── swisscows │ │ ├── authenticator.go │ │ ├── info.go │ │ ├── json.go │ │ ├── new.go │ │ ├── params.go │ │ ├── s_web.go │ │ └── s_web_test.go │ ├── yahoo │ │ ├── dompaths.go │ │ ├── info.go │ │ ├── new.go │ │ ├── params.go │ │ ├── s_web.go │ │ ├── s_web_test.go │ │ └── telemetry.go │ └── yep │ │ ├── info.go │ │ ├── json.go │ │ ├── new.go │ │ ├── params.go │ │ ├── s_web.go │ │ └── s_web_test.go ├── groups.go ├── init.go ├── once.go ├── params.go ├── receiver.go ├── result │ ├── construct.go │ ├── interfaces.go │ ├── output.go │ ├── r_images.go │ ├── r_images_output.go │ ├── r_images_scraped.go │ ├── r_suggestion.go │ ├── r_suggestion_scraped.go │ ├── r_web.go │ ├── r_web_output.go │ ├── r_web_scraped.go │ ├── rank.go │ ├── rank │ │ ├── filler.go │ │ ├── filler_test.go │ │ ├── interfaces.go │ │ ├── results.go │ │ ├── score.go │ │ ├── sort.go │ │ ├── structs_test.go │ │ └── suggestions.go │ ├── rank_scraped.go │ ├── ranksimple.go │ ├── ranksimple_scraped.go │ ├── result_map.go │ ├── shorten.go │ ├── shorten_test.go │ └── suggestions_map.go ├── run_engine.go ├── run_engines.go ├── run_origins.go ├── s_images.go ├── s_suggestions.go ├── s_web.go ├── scraper │ ├── collector.go │ ├── dompaths.go │ ├── enginebase.go │ ├── interfaces.go │ ├── pagecontext.go │ ├── pagerankcounter.go │ ├── parse │ │ ├── fields.go │ │ └── parse.go │ ├── requests.go │ ├── scrape.go │ ├── suggest_resp.go │ └── timeout.go ├── searchtype │ └── name.go └── useragent │ └── useragent.go └── utils ├── anonymize ├── hash.go ├── hash_test.go ├── string.go ├── string_test.go └── structs_test.go ├── gotypelimits ├── ints.go └── uints.go ├── kvpair └── kvpair.go ├── morestrings └── join.go ├── moretime ├── convert.go └── types.go └── moreurls ├── build.go ├── fqdn.go └── params.go /.air.toml: -------------------------------------------------------------------------------- 1 | root = "." 2 | testdata_dir = "testdata" 3 | tmp_dir = "tmp" 4 | 5 | [build] 6 | args_bin = [] 7 | bin = "./tmp/main" 8 | cmd = "CGO_ENABLED=0 go build -ldflags \"-s -w\" -trimpath -o ./tmp/main ./src" 9 | delay = 1000 10 | exclude_dir = ["*"] 11 | exclude_file = [] 12 | exclude_regex = ["_test.go"] 13 | exclude_unchanged = false 14 | follow_symlink = false 15 | full_bin = "" 16 | include_dir = ["src"] 17 | include_ext = ["go"] 18 | include_file = [] 19 | kill_delay = "0s" 20 | log = "build-errors.log" 21 | poll = false 22 | poll_interval = 0 23 | post_cmd = [] 24 | pre_cmd = [] 25 | rerun = false 26 | rerun_delay = 500 27 | send_interrupt = true 28 | stop_on_error = false 29 | 30 | [color] 31 | app = "" 32 | build = "yellow" 33 | main = "magenta" 34 | runner = "green" 35 | watcher = "cyan" 36 | 37 | [log] 38 | main_only = false 39 | time = false 40 | 41 | [misc] 42 | clean_on_exit = true 43 | 44 | [screen] 45 | clear_on_rebuild = true 46 | keep_scroll = true 47 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | ** 2 | !docker/ 3 | !dist/ -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [hearchco, aleksasiriski] 2 | ko_fi: aleksasiriski 3 | liberapay: hearchco 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: "bug" 6 | assignees: "" 7 | --- 8 | 9 | **Describe the bug** 10 | A clear and concise description of what the bug is. 11 | 12 | **To Reproduce** 13 | Steps to reproduce the behavior: 14 | 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **What's your setup? (please complete the following information):** 27 | 28 | - Using monolith or micro service setup?: [e.g. monolith] 29 | - Version of Hearchco [e.g. 1.2.3] 30 | - Method of installation [e.g. docker] 31 | 32 | **Docker compose file if used** 33 | 34 | ```docker 35 | Your compose goes here 36 | ``` 37 | 38 | **Hearchco config file** 39 | 40 | ```yaml 41 | Your config goes here 42 | ``` 43 | 44 | **Additional context** 45 | Add any other context about the problem here. 46 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Ask for adding a new feature 4 | title: "[FEAT]" 5 | labels: "enhancement" 6 | assignees: "" 7 | --- 8 | 9 | **Describe your feature request** 10 | A clear and concise description of what the feature request is. 11 | 12 | **Screenshots** 13 | If applicable, add screenshots to help explain your request. 14 | 15 | **Additional context** 16 | Add any other context about the problem here. 17 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | - package-ecosystem: "docker" 8 | directory: "/" 9 | schedule: 10 | interval: "weekly" 11 | - package-ecosystem: "gomod" 12 | directory: "/" 13 | schedule: 14 | interval: "weekly" 15 | -------------------------------------------------------------------------------- /.github/workflows/ci_codeql.yml: -------------------------------------------------------------------------------- 1 | name: CodeQL CI 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | paths: 7 | - ".github/workflows/ci_codeql.yml" # this file 8 | - "go.mod" 9 | - "go.sum" 10 | - "Makefile" 11 | - "generate/**/*" 12 | - "src/**/*" 13 | pull_request: 14 | branches: ["*"] 15 | paths: 16 | - ".github/workflows/ci_codeql.yml" # this file 17 | - "go.mod" 18 | - "go.sum" 19 | - "Makefile" 20 | - "generate/**/*" 21 | - "src/**/*" 22 | schedule: 23 | - cron: "38 14 * * 3" 24 | 25 | jobs: 26 | analyze: 27 | name: Analyze 28 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} 29 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} 30 | permissions: 31 | security-events: write 32 | actions: read 33 | contents: read 34 | 35 | strategy: 36 | fail-fast: false 37 | matrix: 38 | language: ["go"] 39 | 40 | steps: 41 | - name: Checkout repository 42 | uses: actions/checkout@v4 43 | 44 | - name: Setup Go 45 | uses: actions/setup-go@v5 46 | with: 47 | go-version: stable 48 | 49 | - name: Initialize CodeQL 50 | uses: github/codeql-action/init@v3 51 | with: 52 | languages: ${{ matrix.language }} 53 | 54 | - name: Generate go code from go:generate comments 55 | run: make install 56 | 57 | - name: Build project 58 | run: make compile 59 | 60 | - name: Perform CodeQL Analysis 61 | uses: github/codeql-action/analyze@v3 62 | with: 63 | category: "/language:${{matrix.language}}" 64 | -------------------------------------------------------------------------------- /.github/workflows/ci_main.yml: -------------------------------------------------------------------------------- 1 | name: Main CI 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | paths: 7 | - ".github/workflows/ci_main.yml" # this file 8 | - "go.mod" 9 | - "go.sum" 10 | - "Makefile" 11 | - "goreleaser/develop.yml" 12 | - "generate/**/*" 13 | - "src/**/*" 14 | - "docker/**/*" 15 | - ".dockerignore" 16 | 17 | env: 18 | REGISTRY: ghcr.io 19 | IMAGE_NAME: ${{ github.repository }} 20 | 21 | jobs: 22 | goreleaser: 23 | runs-on: ubuntu-latest 24 | permissions: 25 | contents: read 26 | 27 | steps: 28 | - name: Checkout repository 29 | uses: actions/checkout@v4 30 | with: 31 | fetch-depth: 0 32 | 33 | - name: Setup Go 34 | uses: actions/setup-go@v5 35 | with: 36 | go-version: stable 37 | 38 | - name: Setup QEMU 39 | uses: docker/setup-qemu-action@v3 40 | with: 41 | platforms: arm64,arm 42 | 43 | - name: Generate go code from go:generate comments 44 | run: make install 45 | 46 | - name: Snapshot release 47 | uses: goreleaser/goreleaser-action@v6 48 | with: 49 | version: "~> v2" 50 | args: release --snapshot --clean --config goreleaser/develop.yml 51 | 52 | test: 53 | runs-on: ubuntu-latest 54 | permissions: 55 | contents: read 56 | 57 | steps: 58 | - name: Checkout repository 59 | uses: actions/checkout@v4 60 | 61 | - name: Setup Go 62 | uses: actions/setup-go@v5 63 | with: 64 | go-version: stable 65 | 66 | - name: Generate go code from go:generate comments 67 | run: make install 68 | 69 | - name: Build project 70 | run: make compile 71 | 72 | - name: Test units 73 | run: make test 74 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: ["v*.*.*"] 6 | 7 | env: 8 | REGISTRY: ghcr.io 9 | IMAGE_NAME: ${{ github.repository }} 10 | 11 | jobs: 12 | goreleaser: 13 | runs-on: ubuntu-latest 14 | permissions: 15 | contents: write 16 | packages: write 17 | issues: write 18 | 19 | steps: 20 | - name: Checkout repository 21 | uses: actions/checkout@v4 22 | with: 23 | fetch-depth: 0 24 | fetch-tags: true 25 | 26 | - name: Setup Go 27 | uses: actions/setup-go@v5 28 | with: 29 | go-version: stable 30 | 31 | - name: Setup QEMU 32 | uses: docker/setup-qemu-action@v3 33 | with: 34 | platforms: arm64,arm 35 | 36 | - name: Login to GitHub Container Registry 37 | uses: docker/login-action@v3 38 | with: 39 | registry: ${{ env.REGISTRY }} 40 | username: ${{ github.actor }} 41 | password: ${{ secrets.GITHUB_TOKEN }} 42 | 43 | - name: Generate go code from go:generate comments 44 | run: make install 45 | 46 | - name: Release 47 | uses: goreleaser/goreleaser-action@v6 48 | with: 49 | version: "~> v2" 50 | args: release --clean --config goreleaser/release.yml 51 | env: 52 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 53 | 54 | - name: Artifact Linux 55 | uses: actions/upload-artifact@v4 56 | with: 57 | name: build_linux 58 | path: dist/*linux* 59 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | *.wasm 11 | bin/ 12 | 13 | # Test binary, built with `go test -c` 14 | *.test 15 | 16 | # Output of the go coverage tool, specifically when used with LiteIDE 17 | *.out 18 | 19 | # Dependency directories (remove the comment below to include it) 20 | # vendor/ 21 | 22 | # Go workspace file 23 | go.work 24 | go.work.sum 25 | 26 | hearchco.* 27 | .vscode/* 28 | test.go 29 | 30 | src/search/engines/*/site/* 31 | !src/search/engines/_engines_test 32 | !src/search/engines/_sedefaults 33 | 34 | log/ 35 | database/ 36 | profiling/ 37 | 38 | # go generate 39 | *_stringer.go 40 | *_enumer.go 41 | *_enginer.go 42 | *_exchanger.go 43 | 44 | # test dump 45 | testdump* 46 | tmp/ 47 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | run: 2 | air -- --pretty 3 | 4 | debug: 5 | air -- --pretty -v 6 | 7 | trace: 8 | air -- --pretty -vv 9 | 10 | install: 11 | go get ./... 12 | go install github.com/dmarkham/enumer@latest 13 | go generate ./... 14 | go install github.com/air-verse/air@latest 15 | 16 | compile: 17 | CGO_ENABLED=0 go build -ldflags "-s -w" -trimpath ./src/... 18 | compile-linux: 19 | CGO_ENABLED=0 GOOS=linux go build -ldflags "-s -w" -trimpath -o bin/hearchco ./src 20 | compile-macos: 21 | CGO_ENABLED=0 GOOS=darwin go build -ldflags "-s -w" -trimpath -o bin/hearchco ./src 22 | compile-windows: 23 | CGO_ENABLED=0 GOOS=windows go build -ldflags "-s -w" -trimpath -o bin/hearchco.exe ./src 24 | 25 | test: 26 | sh ./scripts/test.sh 27 | test-engines: 28 | sh ./scripts/test-engines.sh 29 | 30 | test-redis: 31 | sh ./scripts/test-redis.sh 32 | test-redis-podman: 33 | sh ./scripts/test-redis-podman.sh 34 | test-redis-docker: 35 | sh ./scripts/test-redis-docker.sh 36 | 37 | test-dynamodb: 38 | sh ./scripts/test-dynamodb.sh 39 | test-dynamodb-podman: 40 | sh ./scripts/test-dynamodb-podman.sh 41 | test-dynamodb-docker: 42 | sh ./scripts/test-dynamodb-docker.sh 43 | 44 | test-all: test test-redis test-dynamodb test-engines 45 | test-all-podman: test test-redis-podman test-dynamodb-podman test-engines 46 | test-all-docker: test test-redis-docker test-dynamodb-docker test-engines 47 | 48 | update: 49 | go get -u ./... 50 | go mod tidy 51 | 52 | lint: 53 | golangci-lint run 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hearchco agent repository built using Go 2 | 3 | ## Installation 4 | 5 | ### Docker 6 | [https://github.com/hearchco/agent/pkgs/container/agent](https://github.com/hearchco/agent/pkgs/container/agent) 7 | 8 | ```bash 9 | docker pull ghcr.io/hearchco/agent 10 | ``` 11 | 12 | ### Binary 13 |
14 | Binary file - Linux 15 | 16 | Download the latest release from the [releases page](https://github.com/hearchco/agent/releases) manually, or automatically like below and set the permissions for the files. 17 | 18 | ```bash 19 | # Replace the 'match' part with your own ARCH 20 | curl -L -o /opt/hearchco <<< echo $(curl -sL https://api.github.com/repos/hearchco/agent/releases/latest | jq -r '.assets[] | select(.name? | match("linux_amd64$")) | .browser_download_url') 21 | ``` 22 | 23 | ### Create a user and modify the rights. 24 | 25 | ```bash 26 | sudo useradd --shell /bin/bash --system --user-group hearchco 27 | sudo chown hearchco:hearchco /opt/hearchco 28 | ``` 29 | 30 | ## Start/Stop/Status 31 | 32 | ### Create a Systemd Unit 33 | 34 | Save example systemd unit file into `/etc/systemd/system/hearchco.service` [docs](../docs/hearchco.service). 35 | 36 | ### Start the hearchco Service 37 | 38 | Reload the service daemon, start the newly create service and check status. 39 | 40 | ```bash 41 | sudo systemctl daemon-reload 42 | sudo systemctl start hearchco 43 | sudo systemctl status hearchco 44 | ``` 45 | 46 | ### Debug 47 | 48 | ```bash 49 | sudo journalctl -u hearchco -b --reverse 50 | ``` 51 | 52 | ### Start hearchco on Startup 53 | 54 | ```bash 55 | sudo systemctl enable hearchco.service 56 | ``` 57 | 58 |
59 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | We love responsible reports of (potential) security issues in Hearchco. 4 | 5 | You can contact us at [security@hearch.co](mailto:security@hearch.co). 6 | 7 | Be sure to provide as much information as possible and if found 8 | also reproduction steps of the identified vulnerability. Also 9 | add the specific URL of the project as well as code you found 10 | the issue in to your report. 11 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM scratch 2 | 3 | COPY --from=alpine /etc/ssl/certs/ /etc/ssl/certs/ 4 | 5 | COPY bootstrap ./hearchco 6 | 7 | # "nobody" user 8 | USER 65534 9 | 10 | ENTRYPOINT ["./hearchco"] 11 | 12 | EXPOSE 8000 13 | 14 | LABEL org.opencontainers.image.source="https://github.com/hearchco/agent" 15 | -------------------------------------------------------------------------------- /docs/example_category.json: -------------------------------------------------------------------------------- 1 | { 2 | "engines": { 3 | "google": { 4 | "enabled": true, 5 | "required": false, 6 | "requiredbyorigin": true, 7 | "preferred": false, 8 | "preferredbyorigin": false 9 | }, 10 | "bing": { 11 | "enabled": true, 12 | "required": false, 13 | "requiredbyorigin": true, 14 | "preferred": false, 15 | "preferredbyorigin": false 16 | }, 17 | "brave": { 18 | "enabled": true, 19 | "required": false, 20 | "requiredbyorigin": false, 21 | "preferred": true, 22 | "preferredbyorigin": false 23 | } 24 | }, 25 | "ranking": { 26 | "rankexp": 0.5, 27 | "rankmul": 1, 28 | "rankconst": 0, 29 | "rankscoremul": 1, 30 | "rankscoreadd": 0, 31 | "timesreturnedmul": 1, 32 | "timesreturnedadd": 0, 33 | "timesreturnedscoremul": 1, 34 | "timesreturnedscoreadd": 0, 35 | "engines": { 36 | "google": { 37 | "mul": 1, 38 | "add": 0 39 | }, 40 | "bing": { 41 | "mul": 1, 42 | "add": 0 43 | }, 44 | "brave": { 45 | "mul": 1, 46 | "add": 0 47 | } 48 | } 49 | }, 50 | "timings": { 51 | "preferredtimeout": "500", 52 | "hardtimeout": "1500" 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /docs/hearchco.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Agent for Hearchco Metasearch engine built using Go 3 | After=network.target 4 | 5 | [Install] 6 | WantedBy=multi-user.target 7 | 8 | [Service] 9 | # Find & Replace User/Group/Path with your own 10 | User=hearchco 11 | Group=hearchco 12 | Type=simple 13 | ExecStart=/opt/hearchco --verbosity --config-path="hearchco.yaml" 14 | WorkingDirectory=/opt/hearchco 15 | TimeoutStopSec=20 16 | KillMode=process 17 | Restart=on-failure 18 | 19 | # See https://www.freedesktop.org/software/systemd/man/systemd.exec.html 20 | DevicePolicy=closed 21 | NoNewPrivileges=yes 22 | PrivateTmp=yes 23 | #PrivateUsers=yes 24 | ProtectControlGroups=yes 25 | ProtectKernelModules=yes 26 | ProtectKernelTunables=yes 27 | RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 28 | RestrictNamespaces=yes 29 | RestrictRealtime=yes 30 | SystemCallFilter=~@clock @debug @module @mount @obsolete @reboot @setuid @swap 31 | #ReadWritePaths=/opt/hearchco 32 | 33 | # Prevent from accessing any real (physical) devices 34 | PrivateDevices=yes 35 | 36 | # You can change the following line to `strict` instead of `full` if you don't 37 | # want it to be able to write anything on your filesystem outside of $ReadWritePaths. 38 | ProtectSystem=strict 39 | 40 | # You can uncomment the following line if you don't have any media in /home/*. 41 | # This will prevent hearchco from ever reading/writing anything there. 42 | ProtectHome=true 43 | -------------------------------------------------------------------------------- /generate/enginer/structs.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "go/ast" 6 | "go/types" 7 | ) 8 | 9 | // Value represents a declared constant. 10 | type Value struct { 11 | originalName string // The name of the constant. 12 | name string // The name with trimmed prefix. 13 | // The value is stored as a bit pattern alone. The boolean tells us 14 | // whether to interpret it as an int64 or a uint64; the only place 15 | // this matters is when sorting. 16 | // Much of the time the str field is all we need; it is printed 17 | // by Value.String. 18 | value uint64 // Will be converted to int64 when needed. 19 | signed bool // Whether the constant is a signed type. 20 | str string // The string representation given by the "go/constant" package. 21 | interfaces []string // The interfaces that the constant implements. 22 | } 23 | 24 | // Generator holds the state of the analysis. Primarily used to buffer 25 | // the output for format.Source. 26 | type Generator struct { 27 | buf bytes.Buffer // Accumulated output. 28 | pkg *Package // Package we are scanning. 29 | 30 | trimPrefix string 31 | 32 | logf func(format string, args ...interface{}) // test logging hook; nil when not testing 33 | } 34 | 35 | // File holds a single parsed file and associated data. 36 | type File struct { 37 | pkg *Package // Package to which this file belongs. 38 | file *ast.File // Parsed AST. 39 | // These fields are reset for each type being generated. 40 | typeName string // Name of the constant type. 41 | values []Value // Accumulator for constant values of that type. 42 | 43 | trimPrefix string 44 | } 45 | 46 | type Package struct { 47 | name string 48 | defs map[*ast.Ident]types.Object 49 | files []*File 50 | } 51 | -------------------------------------------------------------------------------- /generate/enginer/util.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "os" 6 | "slices" 7 | "strings" 8 | ) 9 | 10 | func validConst(v Value) bool { 11 | lowerName := strings.ToLower(v.name) 12 | return lowerName != "undefined" && isDirectory(lowerName) 13 | } 14 | 15 | func validInterfacer(v Value, interfaceName string) bool { 16 | return slices.Contains(v.interfaces, strings.ToLower(interfaceName)) 17 | } 18 | 19 | // isDirectory reports whether the named file is a directory. 20 | func isDirectory(path string) bool { 21 | info, err := os.Stat(path) 22 | if err != nil { 23 | return false 24 | } 25 | return info.IsDir() 26 | } 27 | 28 | func isDirectoryFatal(path string) bool { 29 | info, err := os.Stat(path) 30 | if err != nil { 31 | log.Fatal(err) 32 | // ^FATAL 33 | } 34 | return info.IsDir() 35 | } 36 | -------------------------------------------------------------------------------- /generate/exchanger/structs.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "go/ast" 6 | "go/types" 7 | ) 8 | 9 | // Value represents a declared constant. 10 | type Value struct { 11 | originalName string // The name of the constant. 12 | name string // The name with trimmed prefix. 13 | // The value is stored as a bit pattern alone. The boolean tells us 14 | // whether to interpret it as an int64 or a uint64; the only place 15 | // this matters is when sorting. 16 | // Much of the time the str field is all we need; it is printed 17 | // by Value.String. 18 | value uint64 // Will be converted to int64 when needed. 19 | signed bool // Whether the constant is a signed type. 20 | str string // The string representation given by the "go/constant" package. 21 | interfaces []string // The interfaces that the constant implements. 22 | } 23 | 24 | // Generator holds the state of the analysis. Primarily used to buffer 25 | // the output for format.Source. 26 | type Generator struct { 27 | buf bytes.Buffer // Accumulated output. 28 | pkg *Package // Package we are scanning. 29 | 30 | trimPrefix string 31 | 32 | logf func(format string, args ...interface{}) // test logging hook; nil when not testing 33 | } 34 | 35 | // File holds a single parsed file and associated data. 36 | type File struct { 37 | pkg *Package // Package to which this file belongs. 38 | file *ast.File // Parsed AST. 39 | // These fields are reset for each type being generated. 40 | typeName string // Name of the constant type. 41 | values []Value // Accumulator for constant values of that type. 42 | 43 | trimPrefix string 44 | } 45 | 46 | type Package struct { 47 | name string 48 | defs map[*ast.Ident]types.Object 49 | files []*File 50 | } 51 | -------------------------------------------------------------------------------- /generate/exchanger/util.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "os" 6 | "strings" 7 | ) 8 | 9 | func validConst(v Value) bool { 10 | lowerName := strings.ToLower(v.name) 11 | return lowerName != "undefined" && isDirectory(lowerName) 12 | } 13 | 14 | // isDirectory reports whether the named file is a directory. 15 | func isDirectory(path string) bool { 16 | info, err := os.Stat(path) 17 | if err != nil { 18 | return false 19 | } 20 | return info.IsDir() 21 | } 22 | 23 | func isDirectoryFatal(path string) bool { 24 | info, err := os.Stat(path) 25 | if err != nil { 26 | log.Fatal(err) 27 | // ^FATAL 28 | } 29 | return info.IsDir() 30 | } 31 | -------------------------------------------------------------------------------- /goreleaser/develop.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | # .goreleaser.yml 4 | project_name: hearchco 5 | 6 | # Build 7 | builds: 8 | - env: [CGO_ENABLED=0] 9 | goos: 10 | - linux 11 | main: ./src 12 | binary: bootstrap 13 | goarch: 14 | - amd64 15 | - arm64 16 | - arm 17 | goarm: 18 | - 7 19 | ldflags: 20 | - -s -w 21 | - -X "main.Version=v{{ .Version }}" 22 | - -X "main.GitCommit={{ .ShortCommit }}" 23 | - -X "main.Timestamp={{ .Timestamp }}" 24 | flags: 25 | - -trimpath 26 | 27 | # MacOS Universal Binaries 28 | universal_binaries: 29 | - replace: true 30 | 31 | # Archive 32 | archives: 33 | - name_template: "{{ .ProjectName }}_v{{ .Version }}_{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}" 34 | format: "binary" 35 | 36 | # Checksum 37 | checksum: 38 | name_template: "checksums.txt" 39 | algorithm: sha512 40 | 41 | # Changelog 42 | changelog: 43 | filters: 44 | exclude: 45 | - "^chore:" 46 | - "^ci:" 47 | - "^docs:" 48 | - "^test:" 49 | - "^merge:" 50 | - "^Merge branch" 51 | 52 | # Docker 53 | dockers: 54 | - image_templates: 55 | - "{{ .Env.REGISTRY }}/{{ .Env.IMAGE_NAME }}:v{{ .Version }}-amd64" 56 | use: buildx 57 | goarch: amd64 58 | dockerfile: ./docker/Dockerfile 59 | build_flag_templates: 60 | - --platform=linux/amd64 61 | extra_files: 62 | - docker 63 | 64 | - image_templates: 65 | - "{{ .Env.REGISTRY }}/{{ .Env.IMAGE_NAME }}:v{{ .Version }}-arm64" 66 | use: buildx 67 | goarch: arm64 68 | dockerfile: ./docker/Dockerfile 69 | build_flag_templates: 70 | - --platform=linux/arm64 71 | extra_files: 72 | - docker 73 | 74 | - image_templates: 75 | - "{{ .Env.REGISTRY }}/{{ .Env.IMAGE_NAME }}:v{{ .Version }}-armv7" 76 | use: buildx 77 | goarch: arm 78 | goarm: 7 79 | dockerfile: ./docker/Dockerfile 80 | build_flag_templates: 81 | - --platform=linux/arm/v7 82 | extra_files: 83 | - docker 84 | -------------------------------------------------------------------------------- /hearchco_example.yaml: -------------------------------------------------------------------------------- 1 | server: 2 | frontendurls: http://localhost:5173,https://*hearch.co 3 | cache: 4 | type: none 5 | imageproxy: 6 | secretkey: changemepls 7 | # engines: 8 | # google: 9 | # noweb: true # Disables web search for this engine 10 | # noimages: true # Disables image search for this engine 11 | # nosuggestions: true # Disables suggestions for this engine 12 | -------------------------------------------------------------------------------- /scripts/test-dynamodb-docker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export AWS_REGION=hearchco-test-1 4 | export AWS_ACCESS_KEY_ID=hearchco 5 | export AWS_SECRET_ACCESS_KEY=hearchco 6 | export DYNAMODB_TABLE=hearchco_test 7 | export DYNAMODB_ENDPOINT=http://localhost:8000 8 | 9 | docker run --rm --name hearchco-dynamodb -d -p 8000:8000 docker.io/amazon/dynamodb-local && \ 10 | sleep 5 && \ 11 | aws dynamodb create-table \ 12 | --table-name $DYNAMODB_TABLE \ 13 | --attribute-definitions AttributeName=Key,AttributeType=S \ 14 | --key-schema AttributeName=Key,KeyType=HASH \ 15 | --billing-mode PAY_PER_REQUEST \ 16 | --endpoint-url $DYNAMODB_ENDPOINT && \ 17 | aws dynamodb update-time-to-live \ 18 | --table-name $DYNAMODB_TABLE \ 19 | --time-to-live-specification "Enabled=true, AttributeName=TTL" \ 20 | --endpoint-url $DYNAMODB_ENDPOINT && \ 21 | go test $(go list ./... | grep /dynamodb) -count=1 22 | 23 | docker stop hearchco-dynamodb -------------------------------------------------------------------------------- /scripts/test-dynamodb-podman.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export AWS_REGION=hearchco-test-1 4 | export AWS_ACCESS_KEY_ID=hearchco 5 | export AWS_SECRET_ACCESS_KEY=hearchco 6 | export DYNAMODB_TABLE=hearchco_test 7 | export DYNAMODB_ENDPOINT=http://localhost:8000 8 | 9 | podman run --rm --name hearchco-dynamodb -d -p 8000:8000 docker.io/amazon/dynamodb-local && \ 10 | sleep 5 && \ 11 | aws dynamodb create-table \ 12 | --table-name $DYNAMODB_TABLE \ 13 | --attribute-definitions AttributeName=Key,AttributeType=S \ 14 | --key-schema AttributeName=Key,KeyType=HASH \ 15 | --billing-mode PAY_PER_REQUEST \ 16 | --endpoint-url $DYNAMODB_ENDPOINT && \ 17 | aws dynamodb update-time-to-live \ 18 | --table-name $DYNAMODB_TABLE \ 19 | --time-to-live-specification "Enabled=true, AttributeName=TTL" \ 20 | --endpoint-url $DYNAMODB_ENDPOINT && \ 21 | go test $(go list ./... | grep /dynamodb) -count=1 22 | 23 | podman stop hearchco-dynamodb -------------------------------------------------------------------------------- /scripts/test-dynamodb.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | go test $(go list ./... | grep /dynamodb) -count=1 3 | -------------------------------------------------------------------------------- /scripts/test-engines.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | go test $(go list ./... | grep /engines/) 3 | -------------------------------------------------------------------------------- /scripts/test-redis-docker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker run --rm --name hearchco-redis -d -p 6379:6379 docker.io/library/redis && \ 3 | go test $(go list ./... | grep /redis) -count=1 4 | docker stop hearchco-redis -------------------------------------------------------------------------------- /scripts/test-redis-podman.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | podman run --rm --name hearchco-redis -d -p 6379:6379 docker.io/library/redis && \ 3 | go test $(go list ./... | grep /redis) -count=1 4 | podman stop hearchco-redis -------------------------------------------------------------------------------- /scripts/test-redis.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | go test $(go list ./... | grep /redis) -count=1 3 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | go test $(go list ./... | grep -v /engines/ | grep -v /redis | grep -v /dynamodb) 3 | -------------------------------------------------------------------------------- /src/cache/actions_currencies.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/hearchco/agent/src/exchange/currency" 8 | "github.com/hearchco/agent/src/exchange/engines" 9 | ) 10 | 11 | func (db DB) SetCurrencies(base currency.Currency, engs []engines.Name, currencies currency.Currencies, ttl ...time.Duration) error { 12 | key := combineBaseWithExchangeEnginesNames(base, engs) 13 | return db.driver.Set(key, currencies, ttl...) 14 | } 15 | 16 | func (db DB) GetCurrencies(base currency.Currency, engs []engines.Name) (currency.Currencies, error) { 17 | key := combineBaseWithExchangeEnginesNames(base, engs) 18 | var currencies currency.Currencies 19 | err := db.driver.Get(key, ¤cies) 20 | return currencies, err 21 | } 22 | 23 | func (db DB) GetCurrenciesTTL(base currency.Currency, engs []engines.Name) (time.Duration, error) { 24 | key := combineBaseWithExchangeEnginesNames(base, engs) 25 | return db.driver.GetTTL(key) 26 | } 27 | 28 | func combineBaseWithExchangeEnginesNames(base currency.Currency, engs []engines.Name) string { 29 | return fmt.Sprintf("%v_%v", base.String(), combineExchangeEnginesNames(engs)) 30 | } 31 | 32 | func combineExchangeEnginesNames(engs []engines.Name) string { 33 | var key string 34 | for i, eng := range engs { 35 | if i == 0 { 36 | key = fmt.Sprintf("%v", eng.String()) 37 | } else { 38 | key = fmt.Sprintf("%v_%v", key, eng.String()) 39 | } 40 | } 41 | return key 42 | } 43 | -------------------------------------------------------------------------------- /src/cache/db.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/rs/zerolog/log" 8 | 9 | "github.com/hearchco/agent/src/cache/dynamodb" 10 | "github.com/hearchco/agent/src/cache/nocache" 11 | "github.com/hearchco/agent/src/cache/redis" 12 | "github.com/hearchco/agent/src/config" 13 | ) 14 | 15 | type DB struct { 16 | driver Driver 17 | } 18 | 19 | func New(ctx context.Context, cacheConf config.Cache) (DB, error) { 20 | var drv Driver 21 | var err error 22 | 23 | switch cacheConf.Type { 24 | case "redis": 25 | drv, err = redis.New(ctx, cacheConf.KeyPrefix, cacheConf.Redis) 26 | if err != nil { 27 | err = fmt.Errorf("failed creating a redis cache: %w", err) 28 | } 29 | case "dynamodb": 30 | drv, err = dynamodb.New(ctx, cacheConf.KeyPrefix, cacheConf.DynamoDB) 31 | if err != nil { 32 | err = fmt.Errorf("failed creating a dynamodb cache: %w", err) 33 | } 34 | default: 35 | drv, err = nocache.New() 36 | if err != nil { 37 | err = fmt.Errorf("failed creating a nocache: %w", err) 38 | } 39 | log.Warn().Msg("Running without caching!") 40 | } 41 | 42 | return DB{drv}, err 43 | } 44 | 45 | func (db DB) Close() { 46 | db.driver.Close() 47 | } 48 | -------------------------------------------------------------------------------- /src/cache/driver.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | type Driver interface { 8 | Close() 9 | Set(k string, v any, ttl ...time.Duration) error 10 | Get(k string, o any) error 11 | GetTTL(k string) (time.Duration, error) 12 | } 13 | -------------------------------------------------------------------------------- /src/cache/nocache/nocache.go: -------------------------------------------------------------------------------- 1 | package nocache 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | type DRV struct{} 8 | 9 | func New() (DRV, error) { return DRV{}, nil } 10 | 11 | func (drv DRV) Close() {} 12 | 13 | func (drv DRV) Set(k string, v any, ttl ...time.Duration) error { return nil } 14 | 15 | func (drv DRV) Get(k string, o any) error { return nil } 16 | 17 | func (drv DRV) GetTTL(k string) (time.Duration, error) { return 0, nil } 18 | -------------------------------------------------------------------------------- /src/cache/nocache/nocache_test.go: -------------------------------------------------------------------------------- 1 | package nocache 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestNew(t *testing.T) { 8 | _, err := New() 9 | if err != nil { 10 | t.Errorf("error creating nocache: %v", err) 11 | } 12 | } 13 | 14 | func TestClose(t *testing.T) { 15 | db, err := New() 16 | if err != nil { 17 | t.Errorf("error creating nocache: %v", err) 18 | } 19 | 20 | db.Close() 21 | } 22 | 23 | func TestSet(t *testing.T) { 24 | db, err := New() 25 | if err != nil { 26 | t.Errorf("error creating nocache: %v", err) 27 | } 28 | 29 | defer db.Close() 30 | 31 | err = db.Set("testkey", "testvalue") 32 | if err != nil { 33 | t.Errorf("error setting key-value pair: %v", err) 34 | } 35 | } 36 | 37 | func TestSetTTL(t *testing.T) { 38 | db, err := New() 39 | if err != nil { 40 | t.Errorf("error creating nocache: %v", err) 41 | } 42 | 43 | defer db.Close() 44 | 45 | err = db.Set("testkey", "testvalue", 1) 46 | if err != nil { 47 | t.Errorf("error setting key-value pair with TTL: %v", err) 48 | } 49 | } 50 | 51 | func TestGet(t *testing.T) { 52 | db, err := New() 53 | if err != nil { 54 | t.Errorf("error creating nocache: %v", err) 55 | } 56 | 57 | defer db.Close() 58 | 59 | err = db.Set("testkey", "testvalue") 60 | if err != nil { 61 | t.Errorf("error setting key-value pair: %v", err) 62 | } 63 | 64 | var value string = "testvalue" 65 | err = db.Get("testkey", &value) 66 | if err != nil { 67 | t.Errorf("error getting value: %v", err) 68 | } 69 | 70 | if value != "testvalue" { 71 | t.Errorf("expected value: testvalue, got: %v", value) 72 | } 73 | } 74 | 75 | func TestGetTTL(t *testing.T) { 76 | db, err := New() 77 | if err != nil { 78 | t.Errorf("error creating nocache: %v", err) 79 | } 80 | 81 | defer db.Close() 82 | 83 | err = db.Set("testkey", "testvalue", 1) 84 | if err != nil { 85 | t.Errorf("error setting key-value pair with TTL: %v", err) 86 | } 87 | 88 | ttl, err := db.GetTTL("testkey") 89 | if err != nil { 90 | t.Errorf("error getting TTL: %v", err) 91 | } 92 | 93 | if ttl != 0 { 94 | t.Errorf("expected TTL: 0, got: %v", ttl) 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/cli/flags.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | type Flags struct { 4 | Version versionFlag `name:"version" help:"Print version information and quit"` 5 | Pretty bool `type:"bool" default:"false" env:"HEARCHCO_PRETTY" help:"Make logs pretty"` 6 | Verbosity int8 `type:"counter" default:"0" short:"v" env:"HEARCHCO_VERBOSITY" help:"Log level verbosity"` 7 | ConfigPath string `type:"path" default:"hearchco.yaml" env:"HEARCHCO_CONFIG_PATH" help:"Config file path"` 8 | 9 | Profiler 10 | } 11 | 12 | type Profiler struct { 13 | ProfilerServe bool `type:"bool" default:"false" help:"Run the profiler and serve at /debug/pprof/ http endpoint"` 14 | ProfilerCPU bool `type:"bool" default:"false" help:"Use cpu profiling"` 15 | ProfilerHeap bool `type:"bool" default:"false" help:"Use heap profiling"` 16 | ProfilerGOR bool `type:"bool" default:"false" help:"Use goroutine profiling"` 17 | ProfilerThread bool `type:"bool" default:"false" help:"Use threadcreate profiling"` 18 | ProfilerAlloc bool `type:"bool" default:"false" help:"Use alloc profiling"` 19 | ProfilerBlock bool `type:"bool" default:"false" help:"Use block profiling"` 20 | ProfilerMutex bool `type:"bool" default:"false" help:"Use mutex profiling"` 21 | ProfilerClock bool `type:"bool" default:"false" help:"Use clock profiling"` 22 | ProfilerTrace bool `type:"bool" default:"false" help:"Use trace profiling"` 23 | } 24 | -------------------------------------------------------------------------------- /src/cli/setup.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | import ( 4 | "github.com/alecthomas/kong" 5 | "github.com/rs/zerolog/log" 6 | ) 7 | 8 | // Returns flags struct from parsed cli arguments. 9 | func Setup(ver string, timestamp string, commit string) (Flags, string) { 10 | verStruct := version{ 11 | ver: ver, 12 | timestamp: timestamp, 13 | commit: commit, 14 | } 15 | 16 | var cli Flags 17 | ctx := kong.Parse(&cli, 18 | kong.Name("hearchco"), 19 | kong.Description("Fastasst metasearch engine"), 20 | kong.UsageOnError(), 21 | kong.ConfigureHelp(kong.HelpOptions{ 22 | Summary: true, 23 | Compact: true, 24 | }), 25 | kong.Vars{ 26 | "version": verStruct.String(), 27 | }, 28 | ) 29 | 30 | if err := ctx.Validate(); err != nil { 31 | log.Panic(). 32 | Caller(). 33 | Err(err). 34 | Msg("Failed parsing cli") 35 | // ^PANIC 36 | } 37 | 38 | return cli, verStruct.String() 39 | } 40 | -------------------------------------------------------------------------------- /src/cli/version.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/alecthomas/kong" 7 | ) 8 | 9 | type versionFlag string 10 | 11 | func (v versionFlag) Decode(ctx *kong.DecodeContext) error { return nil } 12 | func (v versionFlag) IsBool() bool { return true } 13 | func (v versionFlag) BeforeApply(app *kong.Kong, vars kong.Vars) error { 14 | fmt.Println(vars["version"]) 15 | app.Exit(0) 16 | return nil 17 | } 18 | 19 | type version struct { 20 | ver string 21 | timestamp string 22 | commit string 23 | } 24 | 25 | func (v version) String() string { 26 | if v.ver == "" { 27 | return "dev" 28 | } else { 29 | return fmt.Sprintf("%v (%v@%v)", v.ver, v.commit, v.timestamp) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/config/defaults.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "time" 5 | 6 | exchengines "github.com/hearchco/agent/src/exchange/engines" 7 | "github.com/hearchco/agent/src/search/engines" 8 | "github.com/hearchco/agent/src/utils/moretime" 9 | ) 10 | 11 | func New() Config { 12 | return Config{ 13 | Server: Server{ 14 | Environment: "normal", 15 | Port: 8000, 16 | FrontendUrls: []string{"http://localhost:5173"}, 17 | Cache: Cache{ 18 | Type: "none", 19 | KeyPrefix: "HEARCHCO_", 20 | TTL: TTL{ 21 | Currencies: moretime.Day, 22 | }, 23 | Redis: Redis{ 24 | Host: "localhost", 25 | Port: 6379, 26 | }, 27 | DynamoDB: DynamoDB{ 28 | Table: "hearchco", 29 | }, 30 | }, 31 | ImageProxy: ImageProxy{ 32 | Timeout: 3 * time.Second, 33 | }, 34 | }, 35 | Engines: EngineConfig{ 36 | NoWeb: []engines.Name{}, 37 | NoImages: []engines.Name{}, 38 | NoSuggestions: []engines.Name{}, 39 | }, 40 | Exchange: Exchange{ 41 | BaseCurrency: "EUR", 42 | Engines: []exchengines.Name{ 43 | exchengines.CURRENCYAPI, 44 | exchengines.EXCHANGERATEAPI, 45 | exchengines.FRANKFURTER, 46 | }, 47 | Timings: ExchangeTimings{ 48 | HardTimeout: 1 * time.Second, 49 | }, 50 | }, 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/config/structs_config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | // ReaderConfig is format in which the config is read from the config file and environment variables. 4 | type ReaderConfig struct { 5 | Server ReaderServer `koanf:"server"` 6 | REngines map[string]ReaderEngineConfig `koanf:"engines"` 7 | RExchange ReaderExchange `koanf:"exchange"` 8 | } 9 | type Config struct { 10 | Server Server 11 | Engines EngineConfig 12 | Exchange Exchange 13 | } 14 | -------------------------------------------------------------------------------- /src/config/structs_engines.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | // ReaderEngineConfig is format in which the config is read from the config file and environment variables. 8 | // Used to disable certain search types for an engine. By default, all types are enabled. 9 | type ReaderEngineConfig struct { 10 | NoWeb bool `koanf:"noweb"` // Whether this engine is disallowed to do web searches. 11 | NoImages bool `koanf:"noimages"` // Whether this engine is disallowed to do image searches. 12 | NoSuggestions bool `koanf:"nosuggestions"` // Whether this engine is disallowed to do suggestion searches. 13 | } 14 | 15 | // Slices of disabled engines for each search type, by default these are empty. 16 | type EngineConfig struct { 17 | NoWeb []engines.Name 18 | NoImages []engines.Name 19 | NoSuggestions []engines.Name 20 | } 21 | -------------------------------------------------------------------------------- /src/config/structs_exchange.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/hearchco/agent/src/exchange/currency" 7 | "github.com/hearchco/agent/src/exchange/engines" 8 | ) 9 | 10 | // ReaderCategory is format in which the config is read from the config file and environment variables. 11 | type ReaderExchange struct { 12 | BaseCurrency string `koanf:"basecurrency"` 13 | REngines map[string]ReaderExchangeEngine `koanf:"engines"` 14 | RTimings ReaderExchangeTimings `koanf:"timings"` 15 | } 16 | type Exchange struct { 17 | BaseCurrency currency.Currency 18 | Engines []engines.Name 19 | Timings ExchangeTimings 20 | } 21 | 22 | // ReaderEngine is format in which the config is read from the config file and environment variables. 23 | type ReaderExchangeEngine struct { 24 | // If false, the engine will not be used. 25 | Enabled bool `koanf:"enabled"` 26 | } 27 | 28 | // ReaderTimings is format in which the config is read from the config file and environment variables. 29 | // In format. 30 | // Example: 1s, 1m, 1h, 1d, 1w, 1M, 1y. 31 | // If unit is not specified, it is assumed to be milliseconds. 32 | type ReaderExchangeTimings struct { 33 | // Hard timeout after which the search is forcefully stopped (even if the engines didn't respond). 34 | HardTimeout string `koanf:"hardtimeout"` 35 | } 36 | type ExchangeTimings struct { 37 | // Hard timeout after which the search is forcefully stopped (even if the engines didn't respond). 38 | HardTimeout time.Duration 39 | } 40 | -------------------------------------------------------------------------------- /src/exchange/currency/currency.go: -------------------------------------------------------------------------------- 1 | package currency 2 | 3 | import ( 4 | "fmt" 5 | "slices" 6 | "strings" 7 | 8 | "github.com/rs/zerolog/log" 9 | ) 10 | 11 | // Format: ISO 4217 (3-letter code) e.g. CHF, EUR, GBP, USD. 12 | type Currency string 13 | 14 | func (c Currency) String() string { 15 | return string(c) 16 | } 17 | 18 | func (c Currency) Lower() string { 19 | return strings.ToLower(c.String()) 20 | } 21 | 22 | func Convert(curr string) (Currency, error) { 23 | if len(curr) != 3 { 24 | return "", fmt.Errorf("currency code must be 3 characters long") 25 | } 26 | 27 | upperCurr := strings.ToUpper(curr) 28 | return Currency(upperCurr), nil 29 | } 30 | 31 | func ConvertBase(curr string) Currency { 32 | // Hardcoded to ensure all APIs include these currencies and therefore work as expected. 33 | supportedBaseCurrencies := [...]string{"CHF", "EUR", "GBP", "USD"} 34 | 35 | upperCurr := strings.ToUpper(curr) 36 | if !slices.Contains(supportedBaseCurrencies[:], upperCurr) { 37 | log.Panic(). 38 | Str("currency", upperCurr). 39 | Msg("unsupported base currency") 40 | // ^PANIC 41 | } 42 | 43 | return Currency(upperCurr) 44 | } 45 | -------------------------------------------------------------------------------- /src/exchange/currency/map.go: -------------------------------------------------------------------------------- 1 | package currency 2 | 3 | import ( 4 | "sync" 5 | ) 6 | 7 | type Currencies map[Currency]float64 8 | 9 | type CurrencyMap struct { 10 | currs map[Currency][]float64 11 | lock sync.RWMutex 12 | } 13 | 14 | func NewCurrencyMap() CurrencyMap { 15 | return CurrencyMap{ 16 | currs: make(map[Currency][]float64), 17 | } 18 | } 19 | 20 | func (c *CurrencyMap) Append(currs Currencies) { 21 | c.lock.Lock() 22 | defer c.lock.Unlock() 23 | 24 | for curr, rate := range currs { 25 | c.currs[curr] = append(c.currs[curr], rate) 26 | } 27 | } 28 | 29 | func (c *CurrencyMap) Extract() Currencies { 30 | c.lock.RLock() 31 | defer c.lock.RUnlock() 32 | 33 | avg := make(Currencies) 34 | for curr, rates := range c.currs { 35 | var sum float64 36 | for _, rate := range rates { 37 | sum += rate 38 | } 39 | avg[curr] = sum / float64(len(rates)) 40 | } 41 | return avg 42 | } 43 | -------------------------------------------------------------------------------- /src/exchange/engines/currencyapi/exchange.go: -------------------------------------------------------------------------------- 1 | package currencyapi 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "net/http" 7 | 8 | "github.com/rs/zerolog/log" 9 | 10 | "github.com/hearchco/agent/src/exchange/currency" 11 | ) 12 | 13 | func (e Exchange) Exchange(base currency.Currency) (currency.Currencies, error) { 14 | // Get data from the API. 15 | api := e.apiUrlWithBaseCurrency(base) 16 | resp, err := http.Get(api) 17 | if err != nil { 18 | return nil, fmt.Errorf("failed to get data from %s: %w", api, err) 19 | } 20 | 21 | // Read the response body. 22 | body, err := io.ReadAll(resp.Body) 23 | if err != nil { 24 | return nil, fmt.Errorf("failed to read response body: %w", err) 25 | } 26 | 27 | // Unmarshal the response. 28 | dataRates, err := e.extractRates(string(body), base) 29 | if err != nil { 30 | return nil, fmt.Errorf("failed to extract rates from response: %w", err) 31 | } 32 | 33 | // Check if no rates were found. 34 | if len(dataRates) == 0 { 35 | return nil, fmt.Errorf("no rates found for %s", base) 36 | } 37 | 38 | // Convert the rates to proper currency types with their rates. 39 | rates := make(currency.Currencies, len(dataRates)) 40 | for currS, rate := range dataRates { 41 | curr, err := currency.Convert(currS) 42 | if err != nil { 43 | // Non-ISO currencies are expected from this engine. 44 | log.Trace(). 45 | Err(err). 46 | Str("currency", currS). 47 | Msg("failed to convert currency") 48 | continue 49 | } 50 | rates[curr] = rate 51 | } 52 | 53 | // Set the base currency rate to 1. 54 | rates[base] = 1 55 | 56 | return rates, nil 57 | } 58 | -------------------------------------------------------------------------------- /src/exchange/engines/currencyapi/info.go: -------------------------------------------------------------------------------- 1 | package currencyapi 2 | 3 | const ( 4 | // Needs to have /.json at the end 5 | apiUrl = "https://cdn.jsdelivr.net/npm/@fawazahmed0/currency-api@2024-03-06/v1/currencies" 6 | ) 7 | -------------------------------------------------------------------------------- /src/exchange/engines/currencyapi/json.go: -------------------------------------------------------------------------------- 1 | package currencyapi 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "regexp" 7 | "strings" 8 | 9 | "github.com/hearchco/agent/src/exchange/currency" 10 | ) 11 | 12 | // Rates field is named the same as base currency. 13 | func (e Exchange) extractRates(resp string, base currency.Currency) (map[string]float64, error) { 14 | pattern := `"` + base.Lower() + `":\s*{[^}]*}` 15 | regexp := regexp.MustCompile(pattern) 16 | match := regexp.FindString(resp) 17 | if match == "" { 18 | return nil, fmt.Errorf("could not find JSON field for base currency %s", base) 19 | } 20 | 21 | // Remove `"":`` from the match 22 | jsonRates := strings.TrimSpace((match[len(base.Lower())+3:])) 23 | 24 | var rates map[string]float64 25 | if err := json.Unmarshal([]byte(jsonRates), &rates); err != nil { 26 | return nil, fmt.Errorf("could not unmarshal JSON field for base currency %s: %w", base, err) 27 | } 28 | 29 | return rates, nil 30 | } 31 | -------------------------------------------------------------------------------- /src/exchange/engines/currencyapi/new.go: -------------------------------------------------------------------------------- 1 | package currencyapi 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/exchange/currency" 5 | ) 6 | 7 | type Exchange struct{} 8 | 9 | func New() Exchange { 10 | return Exchange{} 11 | } 12 | 13 | func (e Exchange) apiUrlWithBaseCurrency(base currency.Currency) string { 14 | return apiUrl + "/" + base.Lower() + ".json" 15 | } 16 | -------------------------------------------------------------------------------- /src/exchange/engines/currencyapi/note.md: -------------------------------------------------------------------------------- 1 | Includes a lot of currencies (and crypto) that aren's in ISO format so errors in logs are to be expected. 2 | -------------------------------------------------------------------------------- /src/exchange/engines/exchanger.go: -------------------------------------------------------------------------------- 1 | package engines 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/exchange/currency" 5 | ) 6 | 7 | type Exchanger interface { 8 | Exchange(base currency.Currency) (currency.Currencies, error) 9 | } 10 | -------------------------------------------------------------------------------- /src/exchange/engines/exchangerateapi/exchange.go: -------------------------------------------------------------------------------- 1 | package exchangerateapi 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io" 7 | "net/http" 8 | 9 | "github.com/rs/zerolog/log" 10 | 11 | "github.com/hearchco/agent/src/exchange/currency" 12 | ) 13 | 14 | func (e Exchange) Exchange(base currency.Currency) (currency.Currencies, error) { 15 | // Get data from the API. 16 | api := e.apiUrlWithBaseCurrency(base) 17 | resp, err := http.Get(api) 18 | if err != nil { 19 | return nil, fmt.Errorf("failed to get data from %s: %w", api, err) 20 | } 21 | 22 | // Read the response body. 23 | body, err := io.ReadAll(resp.Body) 24 | if err != nil { 25 | return nil, fmt.Errorf("failed to read response body: %w", err) 26 | } 27 | 28 | // Unmarshal the response. 29 | var data response 30 | if err := json.Unmarshal(body, &data); err != nil { 31 | return nil, fmt.Errorf("failed to unmarshal response: %w", err) 32 | } 33 | 34 | // Check if no rates were found. 35 | if len(data.Rates) == 0 { 36 | return nil, fmt.Errorf("no rates found for %s", base) 37 | } 38 | 39 | // Convert the rates to proper currency types with their rates. 40 | rates := make(currency.Currencies, len(data.Rates)) 41 | for currS, rate := range data.Rates { 42 | curr, err := currency.Convert(currS) 43 | if err != nil { 44 | log.Error(). 45 | Err(err). 46 | Str("currency", currS). 47 | Msg("failed to convert currency") 48 | continue 49 | } 50 | rates[curr] = rate 51 | } 52 | 53 | // Set the base currency rate to 1. 54 | rates[base] = 1 55 | 56 | return rates, nil 57 | } 58 | -------------------------------------------------------------------------------- /src/exchange/engines/exchangerateapi/info.go: -------------------------------------------------------------------------------- 1 | package exchangerateapi 2 | 3 | const ( 4 | // Needs to have / at the end 5 | apiUrl = "https://open.er-api.com/v6/latest" 6 | ) 7 | -------------------------------------------------------------------------------- /src/exchange/engines/exchangerateapi/json.go: -------------------------------------------------------------------------------- 1 | package exchangerateapi 2 | 3 | type response struct { 4 | Rates map[string]float64 `json:"rates"` 5 | } 6 | -------------------------------------------------------------------------------- /src/exchange/engines/exchangerateapi/new.go: -------------------------------------------------------------------------------- 1 | package exchangerateapi 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/exchange/currency" 5 | ) 6 | 7 | type Exchange struct{} 8 | 9 | func New() Exchange { 10 | return Exchange{} 11 | } 12 | 13 | func (e Exchange) apiUrlWithBaseCurrency(base currency.Currency) string { 14 | return apiUrl + "/" + base.String() 15 | } 16 | -------------------------------------------------------------------------------- /src/exchange/engines/frankfurter/exchange.go: -------------------------------------------------------------------------------- 1 | package frankfurter 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io" 7 | "net/http" 8 | 9 | "github.com/rs/zerolog/log" 10 | 11 | "github.com/hearchco/agent/src/exchange/currency" 12 | ) 13 | 14 | func (e Exchange) Exchange(base currency.Currency) (currency.Currencies, error) { 15 | // Get data from the API. 16 | api := e.apiUrlWithBaseCurrency(base) 17 | resp, err := http.Get(api) 18 | if err != nil { 19 | return nil, fmt.Errorf("failed to get data from %s: %w", api, err) 20 | } 21 | 22 | // Read the response body. 23 | body, err := io.ReadAll(resp.Body) 24 | if err != nil { 25 | return nil, fmt.Errorf("failed to read response body: %w", err) 26 | } 27 | 28 | // Unmarshal the response. 29 | var data response 30 | if err := json.Unmarshal(body, &data); err != nil { 31 | return nil, fmt.Errorf("failed to unmarshal response: %w", err) 32 | } 33 | 34 | // Check if no rates were found. 35 | if len(data.Rates) == 0 { 36 | return nil, fmt.Errorf("no rates found for %s", base) 37 | } 38 | 39 | // Convert the rates to proper currency types with their rates. 40 | rates := make(currency.Currencies, len(data.Rates)) 41 | for currS, rate := range data.Rates { 42 | curr, err := currency.Convert(currS) 43 | if err != nil { 44 | log.Error(). 45 | Err(err). 46 | Str("currency", currS). 47 | Msg("failed to convert currency") 48 | continue 49 | } 50 | rates[curr] = rate 51 | } 52 | 53 | // Set the base currency rate to 1. 54 | rates[base] = 1 55 | 56 | return rates, nil 57 | } 58 | -------------------------------------------------------------------------------- /src/exchange/engines/frankfurter/info.go: -------------------------------------------------------------------------------- 1 | package frankfurter 2 | 3 | const ( 4 | // Needs to have ?from= at the end 5 | apiUrl = "https://api.frankfurter.app/latest" 6 | ) 7 | -------------------------------------------------------------------------------- /src/exchange/engines/frankfurter/json.go: -------------------------------------------------------------------------------- 1 | package frankfurter 2 | 3 | // Rates doesn't include the base currency. 4 | type response struct { 5 | Rates map[string]float64 `json:"rates"` 6 | } 7 | -------------------------------------------------------------------------------- /src/exchange/engines/frankfurter/new.go: -------------------------------------------------------------------------------- 1 | package frankfurter 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/exchange/currency" 5 | ) 6 | 7 | type Exchange struct{} 8 | 9 | func New() Exchange { 10 | return Exchange{} 11 | } 12 | 13 | func (e Exchange) apiUrlWithBaseCurrency(base currency.Currency) string { 14 | return apiUrl + "?from=" + base.String() 15 | } 16 | -------------------------------------------------------------------------------- /src/exchange/engines/name.go: -------------------------------------------------------------------------------- 1 | package engines 2 | 3 | import ( 4 | "strings" 5 | ) 6 | 7 | type Name int 8 | 9 | //go:generate enumer -type=Name -json -text -sql 10 | //go:generate go run github.com/hearchco/agent/generate/exchanger -type=Name -packagename exchange -output ../engine_exchanger.go 11 | const ( 12 | UNDEFINED Name = iota 13 | CURRENCYAPI 14 | EXCHANGERATEAPI 15 | FRANKFURTER 16 | ) 17 | 18 | // Returns engine names without UNDEFINED. 19 | func Names() []Name { 20 | return _NameValues[1:] 21 | } 22 | 23 | func (n Name) ToLower() string { 24 | return strings.ToLower(n.String()) 25 | } 26 | -------------------------------------------------------------------------------- /src/logger/setup.go: -------------------------------------------------------------------------------- 1 | package logger 2 | 3 | import ( 4 | "os" 5 | "time" 6 | 7 | "github.com/rs/zerolog" 8 | "github.com/rs/zerolog/log" 9 | ) 10 | 11 | func Setup(verbosity int8, pretty bool) zerolog.Logger { 12 | // Setup logger. 13 | var l zerolog.Logger 14 | if pretty { 15 | // This is much slower to print. 16 | l = log.Output(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.Stamp}) 17 | } else { 18 | l = zerolog.New(os.Stderr).With().Timestamp().Logger() 19 | } 20 | 21 | // Setup verbosity. 22 | switch { 23 | case verbosity > 1: // TRACE 24 | l = l.With().Caller().Logger().Level(zerolog.TraceLevel) 25 | case verbosity == 1: // DEBUG 26 | l = l.Level(zerolog.DebugLevel) 27 | default: // INFO 28 | l = l.Level(zerolog.InfoLevel) 29 | } 30 | 31 | // Set the logger to be global. 32 | log.Logger = l 33 | return l 34 | } 35 | -------------------------------------------------------------------------------- /src/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "os/signal" 7 | "syscall" 8 | _ "time/tzdata" 9 | 10 | "github.com/rs/zerolog/log" 11 | 12 | "github.com/hearchco/agent/src/cache" 13 | "github.com/hearchco/agent/src/cli" 14 | "github.com/hearchco/agent/src/config" 15 | "github.com/hearchco/agent/src/logger" 16 | "github.com/hearchco/agent/src/profiler" 17 | "github.com/hearchco/agent/src/router" 18 | ) 19 | 20 | var ( 21 | // Release variables. 22 | Version string 23 | Timestamp string 24 | GitCommit string 25 | ) 26 | 27 | func main() { 28 | // Setup signal interrupt (CTRL+C). 29 | ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM) 30 | defer cancel() 31 | 32 | // Parse cli flags. 33 | cliFlags, ver := cli.Setup(Version, Timestamp, GitCommit) 34 | 35 | // Configure logger. 36 | lgr := logger.Setup(cliFlags.Verbosity, cliFlags.Pretty) 37 | 38 | // Load config file. 39 | conf := config.New() 40 | conf.Load(cliFlags.ConfigPath) 41 | 42 | // Setup cache database. 43 | db, err := cache.New(ctx, conf.Server.Cache) 44 | if err != nil { 45 | log.Fatal(). 46 | Caller(). 47 | Err(err). 48 | Msg("Failed creating a new cache database") 49 | // ^FATAL 50 | } 51 | defer db.Close() 52 | 53 | // Start profiler if enabled. 54 | _, stopProfiler := profiler.Run(cliFlags) 55 | defer stopProfiler() 56 | 57 | // Start router. 58 | rw := router.New(lgr, conf, db, cliFlags.ProfilerServe, ver) 59 | switch conf.Server.Environment { 60 | case "lambda": 61 | rw.StartLambda() 62 | default: 63 | rw.Start(ctx) 64 | } 65 | 66 | log.Info().Msg("Program finished") 67 | } 68 | -------------------------------------------------------------------------------- /src/profiler/run.go: -------------------------------------------------------------------------------- 1 | package profiler 2 | 3 | import ( 4 | "github.com/pkg/profile" 5 | "github.com/rs/zerolog/log" 6 | 7 | "github.com/hearchco/agent/src/cli" 8 | ) 9 | 10 | type profiler struct { 11 | enabled bool 12 | profile func(p *profile.Profile) 13 | } 14 | 15 | func Run(cliFlags cli.Flags) (bool, func()) { 16 | /* 17 | goroutine — stack traces of all current goroutines 18 | heap — a sampling of memory allocations of live objects 19 | allocs — a sampling of all past memory allocations 20 | threadcreate — stack traces that led to the creation of new OS threads 21 | block — stack traces that led to blocking on synchronization primitives 22 | mutex — stack traces of holders of contended mutexes 23 | */ 24 | 25 | profilers := [...]profiler{{ 26 | enabled: cliFlags.ProfilerCPU, 27 | profile: profile.CPUProfile, 28 | }, { 29 | enabled: cliFlags.ProfilerHeap, 30 | profile: profile.MemProfileHeap, 31 | }, { 32 | enabled: cliFlags.ProfilerGOR, 33 | profile: profile.GoroutineProfile, 34 | }, { 35 | enabled: cliFlags.ProfilerThread, 36 | profile: profile.ThreadcreationProfile, 37 | }, { 38 | enabled: cliFlags.ProfilerBlock, 39 | profile: profile.BlockProfile, 40 | }, { 41 | enabled: cliFlags.ProfilerAlloc, 42 | profile: profile.MemProfileAllocs, 43 | }, { 44 | enabled: cliFlags.ProfilerMutex, 45 | profile: profile.MutexProfile, 46 | }, { 47 | enabled: cliFlags.ProfilerClock, 48 | profile: profile.ClockProfile, 49 | }, { 50 | enabled: cliFlags.ProfilerTrace, 51 | profile: profile.TraceProfile, 52 | }} 53 | 54 | profilerToRun := profiler{enabled: false} 55 | for _, p := range profilers { 56 | if profilerToRun.enabled && p.enabled { 57 | log.Fatal(). 58 | Caller(). 59 | Msg("Only one profiler can be run at a time") 60 | // ^FATAL 61 | } else if p.enabled { 62 | profilerToRun = p 63 | } 64 | } 65 | if !profilerToRun.enabled { 66 | return false, func() {} 67 | } 68 | 69 | p := profile.Start(profilerToRun.profile, profile.ProfilePath("./profiling/"), profile.NoShutdownHook) 70 | return true, func() { 71 | p.Stop() 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/router/lambda.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/aws/aws-lambda-go/events" 7 | "github.com/aws/aws-lambda-go/lambda" 8 | chiadapter "github.com/awslabs/aws-lambda-go-api-proxy/chi" 9 | ) 10 | 11 | func (rw RouterWrapper) StartLambda() { 12 | lambda.Start(func(ctx context.Context, req events.APIGatewayV2HTTPRequest) (events.APIGatewayV2HTTPResponse, error) { 13 | return chiadapter.NewV2(rw.mux).ProxyWithContextV2(ctx, req) 14 | }) 15 | } 16 | -------------------------------------------------------------------------------- /src/router/middlewares/compress.go: -------------------------------------------------------------------------------- 1 | package middlewares 2 | 3 | import ( 4 | "io" 5 | "net/http" 6 | 7 | "github.com/andybalholm/brotli" 8 | "github.com/go-chi/chi/v5/middleware" 9 | "github.com/klauspost/compress/zstd" 10 | "github.com/rs/zerolog/log" 11 | ) 12 | 13 | func compress(lvl int, types ...string) func(next http.Handler) http.Handler { 14 | // Already has deflate and gzip. 15 | comp := middleware.NewCompressor(lvl, types...) 16 | 17 | // Add brotli. 18 | comp.SetEncoder("br", func(w io.Writer, lvl int) io.Writer { 19 | return brotli.NewWriterOptions(w, brotli.WriterOptions{ 20 | Quality: lvl, 21 | }) 22 | }) 23 | 24 | // Add zstd. 25 | comp.SetEncoder("zstd", func(w io.Writer, lvl int) io.Writer { 26 | writer, err := zstd.NewWriter(w, zstd.WithEncoderLevel(zstd.EncoderLevel(lvl))) 27 | if err != nil { 28 | log.Panic().Err(err).Msg("Failed to create zstd writer") 29 | } 30 | return writer 31 | }) 32 | 33 | return comp.Handler 34 | } 35 | -------------------------------------------------------------------------------- /src/router/middlewares/logging.go: -------------------------------------------------------------------------------- 1 | package middlewares 2 | 3 | import ( 4 | "net/http" 5 | "time" 6 | 7 | "github.com/rs/zerolog" 8 | "github.com/rs/zerolog/hlog" 9 | ) 10 | 11 | func ignoredPath(p string, skipPaths []string) bool { 12 | for _, sp := range skipPaths { 13 | if sp == p { 14 | return true 15 | } 16 | } 17 | return false 18 | } 19 | 20 | func zerologMiddleware(lgr zerolog.Logger, skipPaths []string) [](func(http.Handler) http.Handler) { 21 | newHandler := hlog.NewHandler(lgr) 22 | fieldsHandler := hlog.AccessHandler(func(r *http.Request, status int, size int, duration time.Duration) { 23 | // Skip logging for ignored paths. 24 | if ignoredPath(r.URL.Path, skipPaths) { 25 | return 26 | } 27 | 28 | lgr := hlog.FromRequest(r) 29 | event := lgr.Info() 30 | if status >= 500 { 31 | event = lgr.Error() 32 | } else if status >= 400 { 33 | event = lgr.Warn() 34 | } 35 | 36 | event. 37 | Str("method", r.Method). 38 | Str("path", r.URL.Path). 39 | Int("status", status). 40 | Dur("duration", duration). 41 | Str("ip", r.RemoteAddr). 42 | Msg("Request") 43 | }) 44 | 45 | return [](func(http.Handler) http.Handler){ 46 | newHandler, 47 | fieldsHandler, 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/router/middlewares/setup.go: -------------------------------------------------------------------------------- 1 | package middlewares 2 | 3 | import ( 4 | "net/http" 5 | "strings" 6 | 7 | "github.com/go-chi/chi/v5" 8 | "github.com/go-chi/chi/v5/middleware" 9 | "github.com/go-chi/cors" 10 | "github.com/rs/zerolog" 11 | "github.com/rs/zerolog/log" 12 | ) 13 | 14 | func Setup(mux *chi.Mux, lgr zerolog.Logger, frontendUrls []string, serveProfiler bool) { 15 | // Use custom zerolog middleware. 16 | skipPaths := []string{"/healthz", "/versionz"} 17 | mux.Use(zerologMiddleware(lgr, skipPaths)...) 18 | 19 | // Use recovery middleware. 20 | mux.Use(middleware.Recoverer) 21 | 22 | // Use compression middleware, except for image proxy since the response is copied over. 23 | mux.Use(middleware.Maybe(compress(3), func(r *http.Request) bool { 24 | return !strings.HasPrefix(r.URL.Path, "/proxy") 25 | })) 26 | 27 | // Use CORS middleware. 28 | mux.Use(cors.Handler(cors.Options{ 29 | AllowedOrigins: frontendUrls, 30 | AllowedMethods: []string{"GET", "POST", "OPTIONS"}, 31 | AllowedHeaders: []string{ 32 | "Accept", 33 | "Accept-Encoding", 34 | "Accept-Language", 35 | "Access-Control-Request-Headers", 36 | "Access-Control-Request-Method", 37 | "Origin", 38 | }, 39 | AllowCredentials: false, 40 | MaxAge: 300, 41 | })) 42 | 43 | log.Debug(). 44 | Strs("url", frontendUrls). 45 | Msg("Using CORS") 46 | 47 | // Use strip slashes middleware, except for pprof. 48 | mux.Use(middleware.Maybe(middleware.StripSlashes, func(r *http.Request) bool { 49 | return !strings.HasPrefix(r.URL.Path, "/debug") 50 | })) 51 | 52 | // Use pprof router if profiling is enabled. 53 | if serveProfiler { 54 | mux.Mount("/debug", middleware.Profiler()) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/router/router.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | "strconv" 7 | "time" 8 | 9 | "github.com/go-chi/chi/v5" 10 | "github.com/rs/zerolog" 11 | "github.com/rs/zerolog/log" 12 | 13 | "github.com/hearchco/agent/src/cache" 14 | "github.com/hearchco/agent/src/config" 15 | "github.com/hearchco/agent/src/router/middlewares" 16 | "github.com/hearchco/agent/src/router/routes" 17 | ) 18 | 19 | type RouterWrapper struct { 20 | mux *chi.Mux 21 | port int 22 | } 23 | 24 | func New(lgr zerolog.Logger, conf config.Config, db cache.DB, serveProfiler bool, version string) RouterWrapper { 25 | mux := chi.NewRouter() 26 | 27 | middlewares.Setup(mux, lgr, conf.Server.FrontendUrls, serveProfiler) 28 | routes.Setup(mux, version, db, conf) 29 | 30 | return RouterWrapper{mux: mux, port: conf.Server.Port} 31 | } 32 | 33 | func (rw RouterWrapper) Start(ctx context.Context) { 34 | // Create server. 35 | srv := http.Server{ 36 | Addr: ":" + strconv.Itoa(rw.port), 37 | Handler: rw.mux, 38 | } 39 | 40 | log.Info(). 41 | Int("port", rw.port). 42 | Msg("Starting server") 43 | 44 | // Shut down server gracefully on context cancellation. 45 | go func() { 46 | <-ctx.Done() 47 | log.Info().Msg("Shutting down server") 48 | 49 | // Create a context with timeout of 5 seconds. 50 | timeout, cancel := context.WithTimeout(context.Background(), 5*time.Second) 51 | defer cancel() 52 | 53 | // Shutdown gracefully. 54 | // After the timeout is reached, server will be shut down forcefully. 55 | err := srv.Shutdown(timeout) 56 | if err != nil { 57 | log.Error(). 58 | Caller(). 59 | Err(err). 60 | Msg("Server shut down failed") 61 | } else { 62 | log.Info(). 63 | Msg("Server shut down") 64 | } 65 | }() 66 | 67 | // Start server. 68 | err := srv.ListenAndServe() 69 | if err != nil && err != http.ErrServerClosed { 70 | log.Fatal(). 71 | Caller(). 72 | Err(err). 73 | Msg("Failed to start server") 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/router/routes/params.go: -------------------------------------------------------------------------------- 1 | package routes 2 | 3 | import ( 4 | "net/url" 5 | ) 6 | 7 | func getParamOrDefault(params url.Values, key string, fallback ...string) string { 8 | val := params.Get(key) 9 | if val == "" && len(fallback) > 0 { 10 | return fallback[0] 11 | } 12 | return val 13 | } 14 | -------------------------------------------------------------------------------- /src/router/routes/responses.go: -------------------------------------------------------------------------------- 1 | package routes 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/exchange/currency" 5 | "github.com/hearchco/agent/src/search/result" 6 | ) 7 | 8 | type ErrorResponse struct { 9 | Message string `json:"message"` 10 | Value string `json:"value"` 11 | } 12 | 13 | type responseBase struct { 14 | Version string `json:"version"` 15 | Duration int64 `json:"duration"` 16 | } 17 | 18 | type ResultsResponse struct { 19 | responseBase 20 | 21 | Results []result.ResultOutput `json:"results"` 22 | } 23 | 24 | type SuggestionsResponse struct { 25 | responseBase 26 | 27 | Suggestions []result.Suggestion `json:"suggestions"` 28 | } 29 | 30 | type ExchangeResponse struct { 31 | responseBase 32 | 33 | Base currency.Currency `json:"base"` 34 | From currency.Currency `json:"from"` 35 | To currency.Currency `json:"to"` 36 | Amount float64 `json:"amount"` 37 | Result float64 `json:"result"` 38 | } 39 | 40 | type CurrenciesResponse struct { 41 | responseBase 42 | 43 | Base currency.Currency `json:"base"` 44 | Currencies currency.Currencies `json:"currencies"` 45 | } 46 | -------------------------------------------------------------------------------- /src/router/routes/route_currencies.go: -------------------------------------------------------------------------------- 1 | package routes 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "time" 7 | 8 | "github.com/hearchco/agent/src/cache" 9 | "github.com/hearchco/agent/src/config" 10 | "github.com/hearchco/agent/src/exchange" 11 | "github.com/rs/zerolog/log" 12 | ) 13 | 14 | func routeCurrencies(w http.ResponseWriter, ver string, conf config.Exchange, db cache.DB, ttl time.Duration) error { 15 | // Capture start time. 16 | startTime := time.Now() 17 | 18 | // Get the cached currencies. 19 | currencies, err := db.GetCurrencies(conf.BaseCurrency, conf.Engines) 20 | if err != nil { 21 | log.Error(). 22 | Err(err). 23 | Str("base", conf.BaseCurrency.String()). 24 | Str("engines", fmt.Sprintf("%v", conf.Engines)). 25 | Msg("Error while getting currencies from cache") 26 | } 27 | 28 | // Create the exchange. 29 | var exch exchange.Exchange 30 | if currencies == nil { 31 | // Fetch the currencies from the enabled engines. 32 | exch = exchange.NewExchange(conf) 33 | // Cache the currencies if any have been fetched. 34 | if len(exch.Currencies()) > 0 { 35 | err := db.SetCurrencies(conf.BaseCurrency, conf.Engines, exch.Currencies(), ttl) 36 | if err != nil { 37 | log.Error(). 38 | Err(err). 39 | Str("base", conf.BaseCurrency.String()). 40 | Str("engines", fmt.Sprintf("%v", conf.Engines)). 41 | Msg("Error while setting currencies in cache") 42 | } 43 | } 44 | } else { 45 | // Use the cached currencies. 46 | exch = exchange.NewExchange(conf, currencies) 47 | } 48 | 49 | return writeResponseJSON(w, http.StatusOK, CurrenciesResponse{ 50 | responseBase{ 51 | ver, 52 | time.Since(startTime).Milliseconds(), 53 | }, 54 | conf.BaseCurrency, 55 | exch.Currencies(), 56 | }) 57 | } 58 | -------------------------------------------------------------------------------- /src/router/routes/writers.go: -------------------------------------------------------------------------------- 1 | package routes 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io" 7 | "net/http" 8 | ) 9 | 10 | func writeResponse(w http.ResponseWriter, status int, body string) error { 11 | w.WriteHeader(status) 12 | _, err := w.Write([]byte(body)) 13 | return err 14 | } 15 | 16 | func writeResponseJSON(w http.ResponseWriter, status int, body any) error { 17 | res, err := json.Marshal(body) 18 | if err != nil { 19 | w.WriteHeader(http.StatusInternalServerError) 20 | _, werr := w.Write([]byte("internal server error")) 21 | if werr != nil { 22 | return fmt.Errorf("%w: %w", werr, err) 23 | } 24 | return err 25 | } 26 | 27 | w.Header().Set("Content-Type", "application/json") 28 | w.WriteHeader(status) 29 | _, err = w.Write(res) 30 | return err 31 | } 32 | 33 | func writeResponseSuggestions(w http.ResponseWriter, status int, query string, suggestions []string) error { 34 | jsonStruct := [...]any{query, suggestions} 35 | res, err := json.Marshal(jsonStruct) 36 | if err != nil { 37 | w.WriteHeader(http.StatusInternalServerError) 38 | _, werr := w.Write([]byte("internal server error")) 39 | if werr != nil { 40 | return fmt.Errorf("%w: %w", werr, err) 41 | } 42 | return err 43 | } 44 | 45 | w.Header().Set("Content-Type", "application/x-suggestions+json") 46 | w.WriteHeader(status) 47 | _, err = w.Write(res) 48 | return err 49 | } 50 | 51 | func writeResponseImageProxy(w http.ResponseWriter, resp *http.Response) error { 52 | if ce := resp.Header.Get("Content-Encoding"); ce != "" { 53 | w.Header().Set("Content-Encoding", ce) 54 | } 55 | 56 | if cl := resp.Header.Get("Content-Length"); cl != "" { 57 | w.Header().Set("Content-Length", cl) 58 | } 59 | 60 | if ct := resp.Header.Get("Content-Type"); ct != "" { 61 | w.Header().Set("Content-Type", ct) 62 | } 63 | 64 | w.WriteHeader(resp.StatusCode) 65 | _, err := io.Copy(w, resp.Body) 66 | return err 67 | } 68 | -------------------------------------------------------------------------------- /src/search/category/disabled.go: -------------------------------------------------------------------------------- 1 | package category 2 | 3 | import ( 4 | "slices" 5 | 6 | "github.com/hearchco/agent/src/search/engines" 7 | ) 8 | 9 | // Returns true if the category contains any disabled engines. 10 | // Otherwise, returns false. 11 | func (c Category) ContainsDisabledEngines(disabledEngines []engines.Name) bool { 12 | for _, eng := range disabledEngines { 13 | if slices.Contains(c.Engines, eng) { 14 | return true 15 | } 16 | } 17 | 18 | return false 19 | } 20 | -------------------------------------------------------------------------------- /src/search/category/type.go: -------------------------------------------------------------------------------- 1 | package category 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/hearchco/agent/src/search/engines" 7 | ) 8 | 9 | type Category struct { 10 | Engines []engines.Name 11 | RequiredEngines []engines.Name 12 | RequiredByOriginEngines []engines.Name 13 | PreferredEngines []engines.Name 14 | PreferredByOriginEngines []engines.Name 15 | Ranking Ranking 16 | Timings Timings 17 | } 18 | 19 | type Ranking struct { 20 | // The exponent, multiplier and addition used on the rank itself. 21 | RankExp float64 22 | RankMul float64 23 | RankAdd float64 24 | // The multiplier and addition used on the rank score (number calculated from dividing 100 with the rank + above variables applied). 25 | RankScoreMul float64 26 | RankScoreAdd float64 27 | // The multiplier and addition used on the number of times the result was returned. 28 | TimesReturnedMul float64 29 | TimesReturnedAdd float64 30 | // The multiplier and addition used on the times returned score (number calculated from doing log(timesReturnedNum + above variables applied)). 31 | TimesReturnedScoreMul float64 32 | TimesReturnedScoreAdd float64 33 | // Multipliers and additions for each engine, applied to the rank score. 34 | Engines map[engines.Name]EngineRanking 35 | } 36 | 37 | type EngineRanking struct { 38 | Mul float64 39 | Add float64 40 | } 41 | 42 | type Timings struct { 43 | // Maximum amount of time to wait for the PreferredEngines (or ByOrigin) to respond. 44 | // If the search is still waiting for the RequiredEngines (or ByOrigin) after this time, the search will continue. 45 | PreferredTimeout time.Duration 46 | // Hard timeout after which the search is forcefully stopped (even if the engines didn't respond). 47 | HardTimeout time.Duration 48 | } 49 | -------------------------------------------------------------------------------- /src/search/context_cancel.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "sync" 7 | "time" 8 | 9 | "github.com/rs/zerolog/log" 10 | 11 | "github.com/hearchco/agent/src/search/engines" 12 | "github.com/hearchco/agent/src/utils/anonymize" 13 | ) 14 | 15 | // Hard timeout is associated with the required engines. 16 | func cancelHardTimeout(start time.Time, cancel context.CancelFunc, query string, wgEngs *sync.WaitGroup, engs []engines.Name, wgByOriginEngs *sync.WaitGroup, byOriginEngs []engines.Name) { 17 | groupNames := [...]string{groupRequired, groupRequiredByOrigin} 18 | cancelTimeout(groupNames, start, cancel, query, wgEngs, engs, wgByOriginEngs, byOriginEngs) 19 | } 20 | 21 | // Preferred timeout is associated with the preferred engines. 22 | func cancelPreferredTimeout(start time.Time, cancel context.CancelFunc, query string, wgEngs *sync.WaitGroup, engs []engines.Name, wgByOriginEngs *sync.WaitGroup, byOriginEngs []engines.Name) { 23 | groupNames := [...]string{groupPreferred, groupPreferredByOrigin} 24 | cancelTimeout(groupNames, start, cancel, query, wgEngs, engs, wgByOriginEngs, byOriginEngs) 25 | } 26 | 27 | // Cancel timeout for the provided engines. 28 | func cancelTimeout(groupNames [2]string, start time.Time, cancel context.CancelFunc, query string, wgEngs *sync.WaitGroup, engs []engines.Name, wgByOriginEngs *sync.WaitGroup, byOriginEngs []engines.Name) { 29 | var wg sync.WaitGroup 30 | 31 | // Wait for all required engines to finish. 32 | wg.Add(1) 33 | go func() { 34 | defer wg.Done() 35 | wgEngs.Wait() 36 | log.Debug(). 37 | Str("query", anonymize.String(query)). 38 | Str("group", groupNames[0]). 39 | Str("engines", fmt.Sprintf("%v", engs)). 40 | Dur("duration", time.Since(start)). 41 | Msg("Scraping group finished") 42 | }() 43 | 44 | // Wait for all required by origin engines to finish. 45 | wg.Add(1) 46 | go func() { 47 | defer wg.Done() 48 | wgByOriginEngs.Wait() 49 | log.Debug(). 50 | Str("query", anonymize.String(query)). 51 | Str("group", groupNames[1]). 52 | Str("engines", fmt.Sprintf("%v", byOriginEngs)). 53 | Dur("duration", time.Since(start)). 54 | Msg("Scraping group finished") 55 | }() 56 | 57 | wg.Wait() 58 | cancel() 59 | } 60 | -------------------------------------------------------------------------------- /src/search/engines/_engines_test/s_images.go: -------------------------------------------------------------------------------- 1 | package _engines_test 2 | 3 | import ( 4 | "context" 5 | "strings" 6 | "testing" 7 | 8 | "github.com/hearchco/agent/src/search/result" 9 | "github.com/hearchco/agent/src/search/scraper" 10 | ) 11 | 12 | func CheckImageSearch(t *testing.T, e scraper.ImageSearcher, tchar []TestCaseHasAnyResults, tccr []TestCaseContainsResults, tcrr []TestCaseRankedResults) { 13 | // TestCaseHasAnyResults 14 | for _, tc := range tchar { 15 | e.InitSearcher(context.Background()) 16 | 17 | resChan := make(chan result.ResultScraped, 100) 18 | go e.ImageSearch(tc.Query, tc.Options, resChan) 19 | 20 | results := make([]result.ResultScraped, 0) 21 | for r := range resChan { 22 | results = append(results, r) 23 | } 24 | 25 | if len(results) == 0 { 26 | defer t.Errorf("Got no results for %q", tc.Query) 27 | } 28 | } 29 | 30 | // TestCaseContainsResults 31 | for _, tc := range tccr { 32 | e.InitSearcher(context.Background()) 33 | 34 | resChan := make(chan result.ResultScraped, 100) 35 | go e.ImageSearch(tc.Query, tc.Options, resChan) 36 | 37 | results := make([]result.ResultScraped, 0) 38 | for r := range resChan { 39 | results = append(results, r) 40 | } 41 | 42 | if len(results) == 0 { 43 | defer t.Errorf("Got no results for %q", tc.Query) 44 | } else { 45 | for _, rURL := range tc.ResultURLs { 46 | found := false 47 | 48 | for _, r := range results { 49 | if strings.Contains(r.URL(), rURL) { 50 | found = true 51 | break 52 | } 53 | } 54 | 55 | if !found { 56 | defer t.Errorf("Couldn't find %q (%q).\nThe results: %q", rURL, tc.Query, results) 57 | } 58 | } 59 | } 60 | } 61 | 62 | // TestCaseRankedResults 63 | for _, tc := range tcrr { 64 | e.InitSearcher(context.Background()) 65 | 66 | resChan := make(chan result.ResultScraped, 100) 67 | go e.ImageSearch(tc.Query, tc.Options, resChan) 68 | 69 | results := make([]result.ResultScraped, 0) 70 | for r := range resChan { 71 | results = append(results, r) 72 | } 73 | 74 | if len(results) == 0 { 75 | defer t.Errorf("Got no results for %q", tc.Query) 76 | } else if len(results) < len(tc.ResultURLs) { 77 | defer t.Errorf("Number of results is less than test case URLs.") 78 | } else { 79 | for i, rURL := range tc.ResultURLs { 80 | if !strings.Contains(results[i].URL(), rURL) { 81 | defer t.Errorf("Wrong result on rank %q: %q (%q).\nThe results: %q", i+1, rURL, tc.Query, results) 82 | } 83 | } 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/search/engines/_engines_test/s_suggestions.go: -------------------------------------------------------------------------------- 1 | package _engines_test 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/result" 8 | "github.com/hearchco/agent/src/search/scraper" 9 | ) 10 | 11 | func CheckSuggest(t *testing.T, e scraper.Suggester, q string) { 12 | sugChan := make(chan result.SuggestionScraped) 13 | go func() { 14 | err, found := e.Suggest(q, NewOpts(), sugChan) 15 | if len(err) > 0 || !found { 16 | t.Errorf("Failed to get suggestions: %v", err) 17 | } 18 | }() 19 | 20 | suggs := make([]string, 0, 10) 21 | for sug := range sugChan { 22 | suggs = append(suggs, sug.Value()) 23 | } 24 | if len(suggs) == 0 { 25 | t.Errorf("No suggestions returned") 26 | } 27 | 28 | for _, s := range suggs { 29 | if s == "" { 30 | t.Errorf("Empty suggestion") 31 | } else if !strings.Contains(s, q) { 32 | t.Errorf("Suggestion doesn't contain query (%q): %q", q, s) 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/search/engines/_engines_test/s_web.go: -------------------------------------------------------------------------------- 1 | package _engines_test 2 | 3 | import ( 4 | "context" 5 | "strings" 6 | "testing" 7 | 8 | "github.com/hearchco/agent/src/search/result" 9 | "github.com/hearchco/agent/src/search/scraper" 10 | ) 11 | 12 | func CheckWebSearch(t *testing.T, e scraper.WebSearcher, tchar []TestCaseHasAnyResults, tccr []TestCaseContainsResults, tcrr []TestCaseRankedResults) { 13 | // TestCaseHasAnyResults 14 | for _, tc := range tchar { 15 | e.InitSearcher(context.Background()) 16 | 17 | resChan := make(chan result.ResultScraped, 100) 18 | go e.WebSearch(tc.Query, tc.Options, resChan) 19 | 20 | results := make([]result.ResultScraped, 0) 21 | for r := range resChan { 22 | results = append(results, r) 23 | } 24 | 25 | if len(results) == 0 { 26 | defer t.Errorf("Got no results for %q", tc.Query) 27 | } 28 | } 29 | 30 | // TestCaseContainsResults 31 | for _, tc := range tccr { 32 | e.InitSearcher(context.Background()) 33 | 34 | resChan := make(chan result.ResultScraped, 100) 35 | go e.WebSearch(tc.Query, tc.Options, resChan) 36 | 37 | results := make([]result.ResultScraped, 0) 38 | for r := range resChan { 39 | results = append(results, r) 40 | } 41 | 42 | if len(results) == 0 { 43 | defer t.Errorf("Got no results for %q", tc.Query) 44 | } else { 45 | for _, rURL := range tc.ResultURLs { 46 | found := false 47 | 48 | for _, r := range results { 49 | if strings.Contains(r.URL(), rURL) { 50 | found = true 51 | break 52 | } 53 | } 54 | 55 | if !found { 56 | defer t.Errorf("Couldn't find %q (%q).\nThe results: %q", rURL, tc.Query, results) 57 | } 58 | } 59 | } 60 | } 61 | 62 | // TestCaseRankedResults 63 | for _, tc := range tcrr { 64 | e.InitSearcher(context.Background()) 65 | 66 | resChan := make(chan result.ResultScraped, 100) 67 | go e.WebSearch(tc.Query, tc.Options, resChan) 68 | 69 | results := make([]result.ResultScraped, 0) 70 | for r := range resChan { 71 | results = append(results, r) 72 | } 73 | 74 | if len(results) == 0 { 75 | defer t.Errorf("Got no results for %q", tc.Query) 76 | } else if len(results) < len(tc.ResultURLs) { 77 | defer t.Errorf("Number of results is less than test case URLs.") 78 | } else { 79 | for i, rURL := range tc.ResultURLs { 80 | if !strings.Contains(results[i].URL(), rURL) { 81 | defer t.Errorf("Wrong result on rank %q: %q (%q).\nThe results: %q", i+1, rURL, tc.Query, results) 82 | } 83 | } 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/search/engines/_engines_test/structs.go: -------------------------------------------------------------------------------- 1 | package _engines_test 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines/options" 5 | ) 6 | 7 | type TestCaseHasAnyResults struct { 8 | Query string 9 | Options options.Options 10 | } 11 | 12 | type TestCaseContainsResults struct { 13 | Query string 14 | ResultURLs []string 15 | Options options.Options 16 | } 17 | 18 | type TestCaseRankedResults struct { 19 | Query string 20 | ResultURLs []string 21 | Options options.Options 22 | } 23 | 24 | func NewOpts() options.Options { 25 | return options.Options{ 26 | Pages: options.Pages{Start: 0, Max: 1}, 27 | Locale: options.LocaleDefault, 28 | SafeSearch: false, 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/search/engines/bing/bing.md: -------------------------------------------------------------------------------- 1 | # Bing 2 | 3 | Getting the URL from dom.Find("div.tpcn div.tptxt cite") doesn't work since it may be truncated. 4 | 5 | Telemetry example: 6 | https://www.bing.com/ck/a?!&&p=23fcb82b91411b05JmltdHM9MTY5MTEwNzIwMCZpZ3VpZD0xMTkyOTg3ZC03OWUyLTY1YTgtMWYzOC04YjFlNzg0NTY0NWYmaW5zaWQ9NTI3OQ&ptn=3&hsh=3&fclid=1192987d-79e2-65a8-1f38-8b1e7845645f&u=a1aHR0cHM6Ly93d3cuaW50ZXJuYXRpb25zLm9yZy9tYWdhemluZS90b3AtMTAtaG9iYmllcy15b3UtdmUtbmV2ZXItaGVhcmQtb2YtMzk3ODQ&ntb=1 7 | 8 | goes to: 9 | https://www.internations.org/magazine/top-10-hobbies-you-ve-never-heard-of-39784 10 | 11 | Description fetching could be improved for complicated results. 12 | 13 | `&setlang=en&cc=us` are the UI language and region parameters respectively. 14 | -------------------------------------------------------------------------------- /src/search/engines/bing/dompaths.go: -------------------------------------------------------------------------------- 1 | package bing 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | var dompaths = scraper.DOMPaths{ 8 | Result: "ol#b_results > li.b_algo", 9 | URL: "h2 > a", 10 | Title: "h2 > a", 11 | Description: "div.b_caption", 12 | } 13 | 14 | type thumbnailDomPaths struct { 15 | Path string 16 | Height string 17 | Width string 18 | } 19 | 20 | type metadataDomPaths struct { 21 | Path string 22 | Attr string 23 | } 24 | 25 | type bingImagesDomPaths struct { 26 | Result string 27 | Metadata metadataDomPaths 28 | Title string 29 | ImgFormatStr string 30 | Thumbnail [3]thumbnailDomPaths 31 | Source string 32 | } 33 | 34 | var imgDompaths = bingImagesDomPaths{ 35 | // aria-live is also a possible attribute for not() 36 | Result: "ul.dgControl_list > li[data-idx] > div.iuscp:not([vrhatt])", 37 | Metadata: metadataDomPaths{ 38 | Path: "a.iusc", 39 | Attr: "m", 40 | }, 41 | Title: "div.infnmpt > div > ul > li > a", 42 | ImgFormatStr: "div.imgpt > div > span", 43 | Thumbnail: [...]thumbnailDomPaths{ 44 | { 45 | Path: "a.iusc > div > img.mimg", 46 | Height: "height", 47 | Width: "width", 48 | }, 49 | { 50 | Path: "a.iusc > div > div > div.mimg > div", 51 | Height: "data-height", 52 | Width: "data-width", 53 | }, 54 | { 55 | Path: "a.iusc > div > div > div.mimg > img", 56 | Height: "height", 57 | Width: "width", 58 | }, 59 | }, 60 | Source: "div.imgpt > div.img_info > div.lnkw > a", 61 | } 62 | -------------------------------------------------------------------------------- /src/search/engines/bing/info.go: -------------------------------------------------------------------------------- 1 | package bing 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | const ( 8 | seName = engines.BING 9 | searchURL = "https://www.bing.com/search" 10 | imageSearchURL = "https://www.bing.com/images/async" 11 | ) 12 | 13 | var origins = [...]engines.Name{seName} 14 | -------------------------------------------------------------------------------- /src/search/engines/bing/json.go: -------------------------------------------------------------------------------- 1 | package bing 2 | 3 | type imgJsonMetadata struct { 4 | PageURL string `json:"purl"` 5 | ThumbnailURL string `json:"turl"` 6 | ImageURL string `json:"murl"` 7 | Desc string `json:"desc"` 8 | } 9 | -------------------------------------------------------------------------------- /src/search/engines/bing/new.go: -------------------------------------------------------------------------------- 1 | package bing 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | type Engine struct { 8 | scraper.EngineBase 9 | } 10 | 11 | func New() *Engine { 12 | return &Engine{scraper.EngineBase{ 13 | Name: seName, 14 | Origins: origins[:], 15 | }} 16 | } 17 | -------------------------------------------------------------------------------- /src/search/engines/bing/params.go: -------------------------------------------------------------------------------- 1 | package bing 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/hearchco/agent/src/search/engines/options" 8 | ) 9 | 10 | const ( 11 | // Variables params. 12 | paramQueryK = "q" 13 | paramPageK = "first" 14 | paramLocaleK = "setlang" // Should be first 2 characters of Locale. 15 | paramLocaleSecK = "cc" // Should be last 2 characters of Locale. 16 | // paramSafeSearchK = "" // Always enabled. 17 | 18 | // Image variable params. 19 | imgCookieLocaleK = "m" 20 | imgCookieLocaleSecK = "u" 21 | imgCookieLocaleAltK = "mkt" 22 | imgCookieLocaleAltSecK = "ui" 23 | 24 | // Image constant params. 25 | imgParamAsyncK, imgParamAsyncV = "async", "1" 26 | imgParamCountK, imgParamCountV = "count", "35" 27 | ) 28 | 29 | func localeParamValues(locale options.Locale) (string, string) { 30 | spl := strings.SplitN(strings.ToLower(locale.String()), "_", 2) 31 | return spl[0], spl[1] 32 | } 33 | 34 | func localeCookieString(locale options.Locale) string { 35 | spl := strings.SplitN(strings.ToLower(locale.String()), "_", 2) 36 | return fmt.Sprintf("%v=%v&%v=%v", imgCookieLocaleK, spl[1], imgCookieLocaleSecK, spl[0]) 37 | } 38 | 39 | func localeAltCookieString(locale options.Locale) string { 40 | spl := strings.SplitN(strings.ToLower(locale.String()), "_", 2) 41 | return fmt.Sprintf("%v=%v&%v=%v", imgCookieLocaleAltK, spl[1], imgCookieLocaleAltSecK, spl[0]) 42 | } 43 | -------------------------------------------------------------------------------- /src/search/engines/bing/s_images_test.go: -------------------------------------------------------------------------------- 1 | package bing 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestImageSearch(t *testing.T) { 11 | // Testing options. 12 | opt := _engines_test.NewOpts() 13 | 14 | // Test cases. 15 | tchar := []_engines_test.TestCaseHasAnyResults{{ 16 | Query: "ping", 17 | Options: opt, 18 | }} 19 | 20 | tccr := []_engines_test.TestCaseContainsResults{{ 21 | Query: "wikipedia logo", 22 | ResultURLs: []string{"upload.wikimedia.org"}, 23 | Options: opt, 24 | }} 25 | 26 | tcrr := []_engines_test.TestCaseRankedResults{{ 27 | Query: "linux logo wikipedia", 28 | ResultURLs: []string{"logos-world.net"}, 29 | Options: opt, 30 | }} 31 | 32 | se := New() 33 | se.InitSearcher(context.Background()) 34 | 35 | _engines_test.CheckImageSearch(t, se, tchar[:], tccr[:], tcrr[:]) 36 | } 37 | -------------------------------------------------------------------------------- /src/search/engines/bing/s_web_test.go: -------------------------------------------------------------------------------- 1 | package bing 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestWebSearch(t *testing.T) { 11 | // Testing options. 12 | opt := _engines_test.NewOpts() 13 | 14 | // Test cases. 15 | tchar := []_engines_test.TestCaseHasAnyResults{{ 16 | Query: "ping", 17 | Options: opt, 18 | }} 19 | 20 | tccr := []_engines_test.TestCaseContainsResults{{ 21 | Query: "facebook", 22 | ResultURLs: []string{"facebook.com"}, 23 | Options: opt, 24 | }} 25 | 26 | tcrr := []_engines_test.TestCaseRankedResults{{ 27 | Query: "wikipedia", 28 | ResultURLs: []string{"wikipedia."}, 29 | Options: opt, 30 | }} 31 | 32 | se := New() 33 | se.InitSearcher(context.Background()) 34 | 35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) 36 | } 37 | -------------------------------------------------------------------------------- /src/search/engines/bing/telemetry.go: -------------------------------------------------------------------------------- 1 | package bing 2 | 3 | import ( 4 | "encoding/base64" 5 | "fmt" 6 | "net/url" 7 | "strings" 8 | ) 9 | 10 | func removeTelemetry(urll string) (string, error) { 11 | if !strings.HasPrefix(urll, "https://www.bing.com/ck/a?") { 12 | return urll, nil 13 | } 14 | 15 | parsedUrl, err := url.Parse(urll) 16 | if err != nil { 17 | return "", fmt.Errorf("failed parsing URL: %w", err) 18 | } 19 | 20 | // Get the first value of "u" parameter and remove "a1" from the beginning. 21 | encodedUrl := parsedUrl.Query().Get("u")[2:] 22 | 23 | cleanUrl, err := base64.RawURLEncoding.DecodeString(encodedUrl) 24 | if err != nil { 25 | return "", fmt.Errorf("failed decoding base64: %w", err) 26 | } 27 | 28 | return string(cleanUrl), nil 29 | } 30 | -------------------------------------------------------------------------------- /src/search/engines/brave/dompaths.go: -------------------------------------------------------------------------------- 1 | package brave 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | var dompaths = scraper.DOMPaths{ 8 | Result: "div.snippet[data-type=\"web\"]", 9 | URL: "a", 10 | Title: "div.title", 11 | Description: "div.snippet-description", 12 | } 13 | -------------------------------------------------------------------------------- /src/search/engines/brave/info.go: -------------------------------------------------------------------------------- 1 | package brave 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | const ( 8 | seName = engines.BRAVE 9 | searchURL = "https://search.brave.com/search" 10 | ) 11 | 12 | var origins = [...]engines.Name{seName} 13 | -------------------------------------------------------------------------------- /src/search/engines/brave/new.go: -------------------------------------------------------------------------------- 1 | package brave 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | type Engine struct { 8 | scraper.EngineBase 9 | } 10 | 11 | func New() *Engine { 12 | return &Engine{scraper.EngineBase{ 13 | Name: seName, 14 | Origins: origins[:], 15 | }} 16 | } 17 | -------------------------------------------------------------------------------- /src/search/engines/brave/params.go: -------------------------------------------------------------------------------- 1 | package brave 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/hearchco/agent/src/search/engines/options" 8 | ) 9 | 10 | const ( 11 | // Variable params. 12 | paramQueryK = "q" 13 | paramPageK = "offset" 14 | cookieLocaleK = "country" // Should be last 2 characters of Locale. 15 | cookieSafeSearchK = "safesearch" // Can be "off" or "strict". 16 | 17 | // Constant params. 18 | paramSourceK, paramSourceV = "source", "web" 19 | paramSpellcheckK, paramSpellcheckV = "spellcheck", "0" 20 | ) 21 | 22 | func localeCookieString(locale options.Locale) string { 23 | region := strings.SplitN(strings.ToLower(locale.String()), "_", 2)[1] 24 | return fmt.Sprintf("%v=%v", cookieLocaleK, region) 25 | } 26 | 27 | func safeSearchCookieString(safesearch bool) string { 28 | if safesearch { 29 | return fmt.Sprintf("%v=%v", cookieSafeSearchK, "strict") 30 | } else { 31 | return fmt.Sprintf("%v=%v", cookieSafeSearchK, "off") 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/search/engines/brave/s_web_test.go: -------------------------------------------------------------------------------- 1 | package brave 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestWebSearch(t *testing.T) { 11 | // Testing options. 12 | opt := _engines_test.NewOpts() 13 | 14 | // Test cases. 15 | tchar := []_engines_test.TestCaseHasAnyResults{{ 16 | Query: "ping", 17 | Options: opt, 18 | }} 19 | 20 | tccr := []_engines_test.TestCaseContainsResults{{ 21 | Query: "facebook", 22 | ResultURLs: []string{"facebook.com"}, 23 | Options: opt, 24 | }} 25 | 26 | tcrr := []_engines_test.TestCaseRankedResults{{ 27 | Query: "wikipedia", 28 | ResultURLs: []string{"wikipedia."}, 29 | Options: opt, 30 | }} 31 | 32 | se := New() 33 | se.InitSearcher(context.Background()) 34 | 35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) 36 | } 37 | -------------------------------------------------------------------------------- /src/search/engines/duckduckgo/ddg.md: -------------------------------------------------------------------------------- 1 | # DuckDuckGo 2 | 3 | Send a [POST request](https://github.com/gocolly/colly/issues/175#issuecomment-400024313) to `https://lite.duckduckgo.com/lite/` with body: `q=&dc=`. It will return 20-22 results. GET requests could be used like `https://lite.duckduckgo.com/lite/?q=&dc=`. 4 | 5 | First request could be: col.PostRaw(Info.URL, []byte("q="+query+"&dc=1")) 6 | 7 | This may be useful: http://api.jquery.com/index/ 8 | 9 | The href on the title sometimes contains telemetry, and is not a valid URL then. That's why we fetch the scheme from it, and append it to the span text. 10 | -------------------------------------------------------------------------------- /src/search/engines/duckduckgo/dompaths.go: -------------------------------------------------------------------------------- 1 | package duckduckgo 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | var dompaths = scraper.DOMPaths{ 8 | ResultsContainer: "div.filters > table > tbody", 9 | URL: "td > a.result-link", 10 | Title: "td > a.result-link", 11 | Description: "td.result-snippet", 12 | } 13 | -------------------------------------------------------------------------------- /src/search/engines/duckduckgo/info.go: -------------------------------------------------------------------------------- 1 | package duckduckgo 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | const ( 8 | seName = engines.DUCKDUCKGO 9 | searchURL = "https://lite.duckduckgo.com/lite/" 10 | suggestURL = "https://duckduckgo.com/ac/" 11 | ) 12 | 13 | var origins = [...]engines.Name{seName, engines.BING} 14 | -------------------------------------------------------------------------------- /src/search/engines/duckduckgo/new.go: -------------------------------------------------------------------------------- 1 | package duckduckgo 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | type Engine struct { 8 | scraper.EngineBase 9 | } 10 | 11 | func New() *Engine { 12 | return &Engine{scraper.EngineBase{ 13 | Name: seName, 14 | Origins: origins[:], 15 | }} 16 | } 17 | -------------------------------------------------------------------------------- /src/search/engines/duckduckgo/params.go: -------------------------------------------------------------------------------- 1 | package duckduckgo 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/hearchco/agent/src/search/engines/options" 8 | ) 9 | 10 | const ( 11 | // Variable params. 12 | paramQueryK = "q" 13 | paramPageK = "dc" 14 | cookieLocaleK = "kl" // Should be Locale with _ replaced by - and first 2 letters as last and vice versa. 15 | // paramSafeSearchK = "" // Always enabled. 16 | 17 | // Suggestions variable params. 18 | sugParamTypeK, sugParamTypeV = "type", "list" 19 | ) 20 | 21 | func localeCookieString(locale options.Locale) string { 22 | spl := strings.SplitN(strings.ToLower(locale.String()), "_", 2) 23 | return fmt.Sprintf("%v=%v-%v", cookieLocaleK, spl[1], spl[0]) 24 | } 25 | -------------------------------------------------------------------------------- /src/search/engines/duckduckgo/s_suggestions.go: -------------------------------------------------------------------------------- 1 | package duckduckgo 2 | 3 | import ( 4 | "sync/atomic" 5 | 6 | "github.com/gocolly/colly/v2" 7 | "github.com/rs/zerolog/log" 8 | 9 | "github.com/hearchco/agent/src/search/engines/options" 10 | "github.com/hearchco/agent/src/search/result" 11 | "github.com/hearchco/agent/src/search/scraper" 12 | "github.com/hearchco/agent/src/utils/anonymize" 13 | "github.com/hearchco/agent/src/utils/moreurls" 14 | ) 15 | 16 | func (se Engine) Suggest(query string, options options.Options, sugChan chan result.SuggestionScraped) ([]error, bool) { 17 | foundResults := atomic.Bool{} 18 | retErrors := make([]error, 0, 1) 19 | 20 | se.OnResponse(func(e *colly.Response) { 21 | log.Trace(). 22 | Caller(). 23 | Bytes("body", e.Body). 24 | Msg("Got response") 25 | 26 | suggs, err := scraper.SuggestRespToSuggestions(e.Body) 27 | if err != nil { 28 | log.Error(). 29 | Caller(). 30 | Err(err). 31 | Bytes("body", e.Body). 32 | Msg("Failed to convert response to suggestions") 33 | } else { 34 | log.Trace(). 35 | Caller(). 36 | Str("engine", se.Name.String()). 37 | Strs("suggestions", suggs). 38 | Msg("Sending suggestions to channel") 39 | for i, sug := range suggs { 40 | sugChan <- result.NewSuggestionScraped(sug, se.Name, i+1) 41 | } 42 | if !foundResults.Load() { 43 | foundResults.Store(true) 44 | } 45 | } 46 | }) 47 | 48 | ctx := colly.NewContext() 49 | 50 | // Build the parameters. 51 | params := moreurls.NewParams( 52 | paramQueryK, query, 53 | sugParamTypeK, sugParamTypeV, 54 | ) 55 | 56 | // Build the url. 57 | urll := moreurls.Build(suggestURL, params) 58 | 59 | // Build anonymous url, by anonymizing the query. 60 | params.Set(paramQueryK, anonymize.String(query)) 61 | anonUrll := moreurls.Build(suggestURL, params) 62 | 63 | // Send the request. 64 | if err := se.Get(ctx, urll, anonUrll); err != nil { 65 | retErrors = append(retErrors, err) 66 | } 67 | 68 | se.Wait() 69 | close(sugChan) 70 | return retErrors[:len(retErrors):len(retErrors)], foundResults.Load() 71 | } 72 | -------------------------------------------------------------------------------- /src/search/engines/duckduckgo/s_suggestions_test.go: -------------------------------------------------------------------------------- 1 | package duckduckgo 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestSuggest(t *testing.T) { 11 | se := New() 12 | se.InitSuggester(context.Background()) 13 | _engines_test.CheckSuggest(t, se, "test") 14 | } 15 | -------------------------------------------------------------------------------- /src/search/engines/duckduckgo/s_web_test.go: -------------------------------------------------------------------------------- 1 | package duckduckgo 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestWebSearch(t *testing.T) { 11 | // Testing options. 12 | opt := _engines_test.NewOpts() 13 | 14 | // Test cases. 15 | tchar := []_engines_test.TestCaseHasAnyResults{{ 16 | Query: "ping", 17 | Options: opt, 18 | }} 19 | 20 | tccr := []_engines_test.TestCaseContainsResults{{ 21 | Query: "facebook", 22 | ResultURLs: []string{"facebook.com"}, 23 | Options: opt, 24 | }} 25 | 26 | tcrr := []_engines_test.TestCaseRankedResults{{ 27 | Query: "wikipedia", 28 | ResultURLs: []string{"wikipedia."}, 29 | Options: opt, 30 | }} 31 | 32 | se := New() 33 | se.InitSearcher(context.Background()) 34 | 35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) 36 | } 37 | -------------------------------------------------------------------------------- /src/search/engines/etools/dompaths.go: -------------------------------------------------------------------------------- 1 | package etools 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | var dompaths = scraper.DOMPaths{ 8 | Result: "table.result > tbody > tr", 9 | URL: "td.record > a", 10 | Title: "td.record > a", 11 | Description: "td.record > div.text", 12 | } 13 | -------------------------------------------------------------------------------- /src/search/engines/etools/etools.md: -------------------------------------------------------------------------------- 1 | # Etools 2 | 3 | The first page request is a POST request that looks like: 4 | https://www.etools.ch/searchSubmit.do 5 | BODY: query=something&country=web&language=all&token=5d8d98d9a968388eeb4191afa00ca469 6 | Also works without token. 7 | 8 | The requests for subsequent pages are GET requests that look like: 9 | https://www.etools.ch/search.do?page=4 10 | With a session cookie you got from some previous request: 11 | JSESSIONID=147933E3060CF19256C3581D55E7A72A 12 | 13 | You can submit a GET request like: 14 | https://www.etools.ch/search.do?page=4&query=cool+cars 15 | But you need the JSESSIONID cookie for it to work 16 | 17 | It seems that, if performed too fast, the server can accidentaly return the same response for different pages. Thus, this package could benefit from some Timings. 18 | 19 | 20 | `?dataSourceResults=20` loads more requests 21 | 22 | Possible settings to apply: `https://www.etools.ch/searchSettings.do` 23 | Interesting are especially: `Results per search engine` and `Results per page` 24 | 25 | Captcha Example: 26 | ![Alt text](captcha.png) -------------------------------------------------------------------------------- /src/search/engines/etools/info.go: -------------------------------------------------------------------------------- 1 | package etools 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | const ( 8 | seName = engines.ETOOLS 9 | searchURL = "https://www.etools.ch/searchSubmit.do" 10 | pageURL = "https://www.etools.ch/search.do" 11 | ) 12 | 13 | var origins = [...]engines.Name{seName, engines.BING, engines.BRAVE, engines.DUCKDUCKGO, engines.GOOGLE, engines.MOJEEK, engines.QWANT, engines.YAHOO} 14 | -------------------------------------------------------------------------------- /src/search/engines/etools/new.go: -------------------------------------------------------------------------------- 1 | package etools 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | type Engine struct { 8 | scraper.EngineBase 9 | } 10 | 11 | func New() *Engine { 12 | return &Engine{scraper.EngineBase{ 13 | Name: seName, 14 | Origins: origins[:], 15 | }} 16 | } 17 | -------------------------------------------------------------------------------- /src/search/engines/etools/params.go: -------------------------------------------------------------------------------- 1 | package etools 2 | 3 | const ( 4 | // Variable params. 5 | paramQueryK = "query" 6 | paramPageK = "page" 7 | paramSafeSearchK = "safeSearch" // Can be "true" or "false". 8 | 9 | // Constant params. 10 | paramCountryK, paramCountryV = "country", "web" 11 | paramLanguageK, paramLanguageV = "language", "all" 12 | ) 13 | 14 | func safeSearchValue(safesearch bool) string { 15 | if safesearch { 16 | return "true" 17 | } else { 18 | return "false" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/search/engines/etools/s_web_test.go: -------------------------------------------------------------------------------- 1 | package etools 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestWebSearch(t *testing.T) { 11 | // Testing options. 12 | opt := _engines_test.NewOpts() 13 | 14 | // Test cases. 15 | tchar := []_engines_test.TestCaseHasAnyResults{{ 16 | Query: "ping", 17 | Options: opt, 18 | }} 19 | 20 | tccr := []_engines_test.TestCaseContainsResults{{ 21 | Query: "facebook", 22 | ResultURLs: []string{"facebook.com"}, 23 | Options: opt, 24 | }} 25 | 26 | tcrr := []_engines_test.TestCaseRankedResults{{ 27 | Query: "wikipedia", 28 | ResultURLs: []string{"wikipedia."}, 29 | Options: opt, 30 | }} 31 | 32 | se := New() 33 | se.InitSearcher(context.Background()) 34 | 35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) 36 | } 37 | -------------------------------------------------------------------------------- /src/search/engines/google/dompaths.go: -------------------------------------------------------------------------------- 1 | package google 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | var dompaths = scraper.DOMPaths{ 8 | Result: "div.g", 9 | URL: "a", 10 | Title: "a > h3", 11 | Description: "div > span", 12 | } 13 | -------------------------------------------------------------------------------- /src/search/engines/google/info.go: -------------------------------------------------------------------------------- 1 | package google 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | const ( 8 | seName = engines.GOOGLE 9 | searchURL = "https://www.google.com/search" 10 | imageSearchURL = "https://www.google.com/search" 11 | suggestURL = "https://suggestqueries.google.com/complete/search" 12 | ) 13 | 14 | var origins = [...]engines.Name{seName} 15 | -------------------------------------------------------------------------------- /src/search/engines/google/json.go: -------------------------------------------------------------------------------- 1 | package google 2 | 3 | type imgJsonResponse struct { 4 | ISCHJ ischj `json:"ischj"` 5 | } 6 | 7 | type ischj struct { 8 | Metadata []metadata `json:"metadata"` 9 | } 10 | 11 | type metadata struct { 12 | Result jsonResult `json:"result"` 13 | TextInGrid textInGrid `json:"text_in_grid"` 14 | OriginalImage image `json:"original_image"` 15 | Thumbnail image `json:"thumbnail"` 16 | } 17 | 18 | type jsonResult struct { 19 | ReferrerUrl string `json:"referrer_url"` 20 | PageTitle string `json:"page_title"` 21 | SiteTitle string `json:"site_title"` 22 | } 23 | 24 | type textInGrid struct { 25 | Snippet string `json:"snippet"` 26 | } 27 | 28 | type image struct { 29 | Url string `json:"url"` 30 | Height int `json:"height"` 31 | Width int `json:"width"` 32 | } 33 | -------------------------------------------------------------------------------- /src/search/engines/google/new.go: -------------------------------------------------------------------------------- 1 | package google 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | type Engine struct { 8 | scraper.EngineBase 9 | } 10 | 11 | func New() *Engine { 12 | return &Engine{scraper.EngineBase{ 13 | Name: seName, 14 | Origins: origins[:], 15 | }} 16 | } 17 | -------------------------------------------------------------------------------- /src/search/engines/google/params.go: -------------------------------------------------------------------------------- 1 | package google 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/hearchco/agent/src/search/engines/options" 7 | ) 8 | 9 | const ( 10 | // Variable params. 11 | paramQueryK = "q" 12 | paramPageK = "start" 13 | paramLocaleK = "hl" // Should be first 2 characters of Locale. 14 | paramLocaleSecK = "lr" // Should be first 2 characters of Locale with prefixed "lang_". 15 | paramSafeSearchK = "safe" // Can be "off", "medium or "high". 16 | 17 | // Constant params. 18 | paramFilterK, paramFilterV = "filter", "0" 19 | 20 | // Image search variable params. 21 | imgParamPageK, imgParamPageVPrefix = "async", "_fmt:json,p:1,ijn:" 22 | 23 | // Image search constant params. 24 | imgParamTbmK, imgParamTbmV = "tbm", "isch" 25 | imgParamAsearchK, imgParamAsearchV = "asearch", "isch" 26 | 27 | // Suggestions constant params. 28 | sugParamClientK, sugParamClientV = "client", "firefox" 29 | ) 30 | 31 | func localeParamValues(locale options.Locale) (string, string) { 32 | lang := strings.SplitN(strings.ToLower(locale.String()), "_", 2)[0] 33 | return lang, "lang_" + lang 34 | } 35 | 36 | func safeSearchParamValue(safesearch bool) string { 37 | if safesearch { 38 | return "high" 39 | } else { 40 | return "off" 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/search/engines/google/s_images_test.go: -------------------------------------------------------------------------------- 1 | package google 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestImageSearch(t *testing.T) { 11 | // Testing options. 12 | opt := _engines_test.NewOpts() 13 | 14 | // Test cases. 15 | tchar := []_engines_test.TestCaseHasAnyResults{{ 16 | Query: "ping", 17 | Options: opt, 18 | }} 19 | 20 | tccr := []_engines_test.TestCaseContainsResults{{ 21 | Query: "wikipedia logo", 22 | ResultURLs: []string{"upload.wikimedia.org"}, 23 | Options: opt, 24 | }} 25 | 26 | tcrr := []_engines_test.TestCaseRankedResults{{ 27 | Query: "linux logo wikipedia", 28 | ResultURLs: []string{"upload.wikimedia.org"}, 29 | Options: opt, 30 | }} 31 | 32 | se := New() 33 | se.InitSearcher(context.Background()) 34 | 35 | _engines_test.CheckImageSearch(t, se, tchar[:], tccr[:], tcrr[:]) 36 | } 37 | -------------------------------------------------------------------------------- /src/search/engines/google/s_suggestions.go: -------------------------------------------------------------------------------- 1 | package google 2 | 3 | import ( 4 | "sync/atomic" 5 | 6 | "github.com/gocolly/colly/v2" 7 | "github.com/rs/zerolog/log" 8 | 9 | "github.com/hearchco/agent/src/search/engines/options" 10 | "github.com/hearchco/agent/src/search/result" 11 | "github.com/hearchco/agent/src/search/scraper" 12 | "github.com/hearchco/agent/src/utils/anonymize" 13 | "github.com/hearchco/agent/src/utils/moreurls" 14 | ) 15 | 16 | func (se Engine) Suggest(query string, options options.Options, sugChan chan result.SuggestionScraped) ([]error, bool) { 17 | foundResults := atomic.Bool{} 18 | retErrors := make([]error, 0, 1) 19 | 20 | se.OnResponse(func(e *colly.Response) { 21 | log.Trace(). 22 | Caller(). 23 | Bytes("body", e.Body). 24 | Msg("Got response") 25 | 26 | suggs, err := scraper.SuggestRespToSuggestions(e.Body) 27 | if err != nil { 28 | log.Error(). 29 | Caller(). 30 | Err(err). 31 | Bytes("body", e.Body). 32 | Msg("Failed to convert response to suggestions") 33 | } else { 34 | log.Trace(). 35 | Caller(). 36 | Str("engine", se.Name.String()). 37 | Strs("suggestions", suggs). 38 | Msg("Sending suggestions to channel") 39 | for i, sug := range suggs { 40 | sugChan <- result.NewSuggestionScraped(sug, se.Name, i+1) 41 | } 42 | if !foundResults.Load() { 43 | foundResults.Store(true) 44 | } 45 | } 46 | }) 47 | 48 | ctx := colly.NewContext() 49 | 50 | // Build the parameters. 51 | params := moreurls.NewParams( 52 | sugParamClientK, sugParamClientV, 53 | paramQueryK, query, 54 | ) 55 | 56 | // Build the url. 57 | urll := moreurls.Build(suggestURL, params) 58 | 59 | // Build anonymous url, by anonymizing the query. 60 | params.Set(paramQueryK, anonymize.String(query)) 61 | anonUrll := moreurls.Build(suggestURL, params) 62 | 63 | // Send the request. 64 | if err := se.Get(ctx, urll, anonUrll); err != nil { 65 | retErrors = append(retErrors, err) 66 | } 67 | 68 | se.Wait() 69 | close(sugChan) 70 | return retErrors[:len(retErrors):len(retErrors)], foundResults.Load() 71 | } 72 | -------------------------------------------------------------------------------- /src/search/engines/google/s_suggestions_test.go: -------------------------------------------------------------------------------- 1 | package google 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestSuggest(t *testing.T) { 11 | se := New() 12 | se.InitSuggester(context.Background()) 13 | _engines_test.CheckSuggest(t, se, "test") 14 | } 15 | -------------------------------------------------------------------------------- /src/search/engines/google/s_web_test.go: -------------------------------------------------------------------------------- 1 | package google 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestWebSearch(t *testing.T) { 11 | // Testing options. 12 | opt := _engines_test.NewOpts() 13 | 14 | // Test cases. 15 | tchar := []_engines_test.TestCaseHasAnyResults{{ 16 | Query: "ping", 17 | Options: opt, 18 | }} 19 | 20 | tccr := []_engines_test.TestCaseContainsResults{{ 21 | Query: "facebook", 22 | ResultURLs: []string{"facebook.com"}, 23 | Options: opt, 24 | }} 25 | 26 | tcrr := []_engines_test.TestCaseRankedResults{{ 27 | Query: "wikipedia", 28 | ResultURLs: []string{"wikipedia."}, 29 | Options: opt, 30 | }} 31 | 32 | se := New() 33 | se.InitSearcher(context.Background()) 34 | 35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) 36 | } 37 | -------------------------------------------------------------------------------- /src/search/engines/googlescholar/dompaths.go: -------------------------------------------------------------------------------- 1 | package googlescholar 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | var dompaths = scraper.DOMPaths{ 8 | Result: "div#gs_res_ccl_mid > div.gs_or", 9 | URL: "h3 > a", 10 | Title: "h3 > a", 11 | Description: "div.gs_rs", 12 | } 13 | -------------------------------------------------------------------------------- /src/search/engines/googlescholar/info.go: -------------------------------------------------------------------------------- 1 | package googlescholar 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | const ( 8 | seName = engines.GOOGLESCHOLAR 9 | searchURL = "https://scholar.google.com/scholar" 10 | ) 11 | 12 | var origins = [...]engines.Name{seName} 13 | -------------------------------------------------------------------------------- /src/search/engines/googlescholar/new.go: -------------------------------------------------------------------------------- 1 | package googlescholar 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | type Engine struct { 8 | scraper.EngineBase 9 | } 10 | 11 | func New() *Engine { 12 | return &Engine{scraper.EngineBase{ 13 | Name: seName, 14 | Origins: origins[:], 15 | }} 16 | } 17 | -------------------------------------------------------------------------------- /src/search/engines/googlescholar/params.go: -------------------------------------------------------------------------------- 1 | package googlescholar 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/hearchco/agent/src/search/engines/options" 7 | ) 8 | 9 | const ( 10 | // Variables params. 11 | paramQueryK = "q" 12 | paramPageK = "start" 13 | paramLocaleK = "hl" // Should be first 2 characters of Locale. 14 | paramLocaleSecK = "lr" // Should be first 2 characters of Locale with prefixed "lang_". 15 | paramSafeSearchK = "safe" // Can be "off", "medium or "high". 16 | 17 | // Constant values. 18 | paramFilterK, paramFilterV = "filter", "0" 19 | ) 20 | 21 | func localeParamValues(locale options.Locale) (string, string) { 22 | lang := strings.SplitN(strings.ToLower(locale.String()), "_", 2)[0] 23 | return lang, "lang_" + lang 24 | } 25 | 26 | func safeSearchParamValue(safesearch bool) string { 27 | if safesearch { 28 | return "high" 29 | } else { 30 | return "off" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/search/engines/googlescholar/s_web_test.go: -------------------------------------------------------------------------------- 1 | package googlescholar 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestWebSearch(t *testing.T) { 11 | // Testing options. 12 | opt := _engines_test.NewOpts() 13 | 14 | // Test cases. 15 | tchar := []_engines_test.TestCaseHasAnyResults{{ 16 | Query: "ping", 17 | Options: opt, 18 | }} 19 | 20 | tccr := []_engines_test.TestCaseContainsResults{{ 21 | Query: "interaction nets", 22 | ResultURLs: []string{"https://dl.acm.org/doi/pdf/10.1145/96709.96718"}, 23 | Options: opt, 24 | }} 25 | 26 | tcrr := []_engines_test.TestCaseRankedResults{{ 27 | Query: "On building fast kd-trees for ray tracing, and on doing that in O (N log N)", 28 | ResultURLs: []string{"https://ieeexplore.ieee.org/abstract/document/4061547/"}, 29 | Options: opt, 30 | }} 31 | 32 | se := New() 33 | se.InitSearcher(context.Background()) 34 | 35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) 36 | } 37 | -------------------------------------------------------------------------------- /src/search/engines/googlescholar/telemetry.go: -------------------------------------------------------------------------------- 1 | package googlescholar 2 | 3 | import ( 4 | "net/url" 5 | ) 6 | 7 | // Remove seemingly unused params in query. 8 | func removeTelemetry(link string) (string, error) { 9 | parsedURL, err := url.Parse(link) 10 | if err != nil { 11 | return link, err 12 | } 13 | 14 | q := parsedURL.Query() 15 | for _, key := range []string{"dq", "lr", "oi", "ots", "sig"} { 16 | q.Del(key) 17 | } 18 | parsedURL.RawQuery = q.Encode() 19 | 20 | return parsedURL.String(), nil 21 | } 22 | -------------------------------------------------------------------------------- /src/search/engines/mojeek/dompaths.go: -------------------------------------------------------------------------------- 1 | package mojeek 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | var dompaths = scraper.DOMPaths{ 8 | Result: "ul.results-standard > li", 9 | URL: "h2 > a.title", 10 | Title: "h2 > a.title", 11 | Description: "p.s", 12 | } 13 | -------------------------------------------------------------------------------- /src/search/engines/mojeek/info.go: -------------------------------------------------------------------------------- 1 | package mojeek 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | const ( 8 | seName = engines.MOJEEK 9 | searchURL = "https://www.mojeek.com/search" 10 | ) 11 | 12 | var origins = [...]engines.Name{seName} 13 | -------------------------------------------------------------------------------- /src/search/engines/mojeek/new.go: -------------------------------------------------------------------------------- 1 | package mojeek 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | type Engine struct { 8 | scraper.EngineBase 9 | } 10 | 11 | func New() *Engine { 12 | return &Engine{scraper.EngineBase{ 13 | Name: seName, 14 | Origins: origins[:], 15 | }} 16 | } 17 | -------------------------------------------------------------------------------- /src/search/engines/mojeek/params.go: -------------------------------------------------------------------------------- 1 | package mojeek 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/hearchco/agent/src/search/engines/options" 7 | ) 8 | 9 | const ( 10 | // Variable params. 11 | paramQueryK = "q" 12 | paramPageK = "s" 13 | paramLocaleK = "lb" // Should be first 2 characters of Locale. 14 | paramLocaleSecK = "arc" // Should be last 2 characters of Locale. 15 | paramSafeSearchK = "safe" // Can be "0" or "1". 16 | ) 17 | 18 | func localeParamValues(locale options.Locale) (string, string) { 19 | spl := strings.SplitN(strings.ToLower(locale.String()), "_", 2) 20 | return spl[0], spl[1] 21 | } 22 | 23 | func safeSearchParamValue(safesearch bool) string { 24 | if safesearch { 25 | return "1" 26 | } else { 27 | return "0" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/search/engines/mojeek/s_web_test.go: -------------------------------------------------------------------------------- 1 | package mojeek 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestWebSearch(t *testing.T) { 11 | // Testing options. 12 | opt := _engines_test.NewOpts() 13 | 14 | // Test cases. 15 | tchar := []_engines_test.TestCaseHasAnyResults{{ 16 | Query: "ping", 17 | Options: opt, 18 | }} 19 | 20 | tccr := []_engines_test.TestCaseContainsResults{{ 21 | Query: "facebook", 22 | ResultURLs: []string{"facebook.com"}, 23 | Options: opt, 24 | }} 25 | 26 | tcrr := []_engines_test.TestCaseRankedResults{{ 27 | Query: "wikipedia", 28 | ResultURLs: []string{"wikipedia."}, 29 | Options: opt, 30 | }} 31 | 32 | se := New() 33 | se.InitSearcher(context.Background()) 34 | 35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) 36 | } 37 | -------------------------------------------------------------------------------- /src/search/engines/name.go: -------------------------------------------------------------------------------- 1 | package engines 2 | 3 | import "strings" 4 | 5 | type Name int 6 | 7 | //go:generate enumer -type=Name -json -text 8 | //go:generate go run github.com/hearchco/agent/generate/enginer -type=Name -packagename search -output ../engine_enginer.go 9 | const ( 10 | UNDEFINED Name = iota 11 | BING // enginer,websearcher,imagesearcher 12 | BRAVE // enginer,websearcher 13 | DUCKDUCKGO // enginer,websearcher,suggester 14 | ETOOLS // enginer,websearcher 15 | GOOGLE // enginer,websearcher,imagesearcher,suggester 16 | GOOGLESCHOLAR // enginer,websearcher 17 | MOJEEK // enginer,websearcher 18 | PRESEARCH // enginer,websearcher 19 | QWANT // enginer,websearcher 20 | STARTPAGE // enginer,websearcher 21 | SWISSCOWS // enginer,websearcher 22 | YAHOO // enginer,websearcher 23 | YEP // disabled 24 | ) 25 | 26 | // Returns engine names without UNDEFINED. 27 | func Names() []Name { 28 | return _NameValues[1:] 29 | } 30 | 31 | func (n Name) ToLower() string { 32 | return strings.ToLower(n.String()) 33 | } 34 | -------------------------------------------------------------------------------- /src/search/engines/options/locale.go: -------------------------------------------------------------------------------- 1 | package options 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | // format: en_US 8 | type Locale string 9 | 10 | const LocaleDefault Locale = "en_US" 11 | 12 | func (l Locale) String() string { 13 | return string(l) 14 | } 15 | 16 | func (l Locale) Validate() error { 17 | if l == "" { 18 | return fmt.Errorf("invalid locale: empty") 19 | } 20 | 21 | if len(l) != 5 { 22 | return fmt.Errorf("invalid locale: isn't 5 characters long") 23 | } 24 | 25 | if !(('a' <= l[0] && l[0] <= 'z') && ('a' <= l[1] && l[1] <= 'z')) { 26 | return fmt.Errorf("invalid locale: first two characters must be lowercase ASCII letters") 27 | } 28 | 29 | if !(('A' <= l[3] && l[3] <= 'Z') && ('A' <= l[4] && l[4] <= 'Z')) { 30 | return fmt.Errorf("invalid locale: last two characters must be uppercase ASCII letters") 31 | } 32 | 33 | if l[2] != '_' { 34 | return fmt.Errorf("invalid locale: third character must be underscore") 35 | } 36 | 37 | return nil 38 | } 39 | 40 | func StringToLocale(s string) (Locale, error) { 41 | l := Locale(s) 42 | if err := l.Validate(); err != nil { 43 | return "", err 44 | } 45 | 46 | return l, nil 47 | } 48 | -------------------------------------------------------------------------------- /src/search/engines/options/structs.go: -------------------------------------------------------------------------------- 1 | package options 2 | 3 | // User provided options for every search engine. 4 | type Options struct { 5 | Pages Pages 6 | Locale Locale 7 | SafeSearch bool 8 | } 9 | 10 | // Start must be 0-based index. 11 | // Max must be greater than 0. 12 | type Pages struct { 13 | Start int 14 | Max int 15 | } 16 | -------------------------------------------------------------------------------- /src/search/engines/presearch/info.go: -------------------------------------------------------------------------------- 1 | package presearch 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | const ( 8 | seName = engines.PRESEARCH 9 | searchURL = "https://presearch.com/search" 10 | ) 11 | 12 | var origins = [...]engines.Name{seName, engines.GOOGLE} 13 | -------------------------------------------------------------------------------- /src/search/engines/presearch/json.go: -------------------------------------------------------------------------------- 1 | package presearch 2 | 3 | type jsonResult struct { 4 | Title string `json:"title"` 5 | Link string `json:"link"` 6 | Desc string `json:"description"` 7 | Favicon string `json:"favicon"` 8 | } 9 | 10 | type jsonResponse struct { 11 | Results struct { 12 | StandardResults []jsonResult `json:"standardResults"` 13 | } `json:"results"` 14 | } 15 | -------------------------------------------------------------------------------- /src/search/engines/presearch/new.go: -------------------------------------------------------------------------------- 1 | package presearch 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | type Engine struct { 8 | scraper.EngineBase 9 | } 10 | 11 | func New() *Engine { 12 | return &Engine{scraper.EngineBase{ 13 | Name: seName, 14 | Origins: origins[:], 15 | }} 16 | } 17 | -------------------------------------------------------------------------------- /src/search/engines/presearch/params.go: -------------------------------------------------------------------------------- 1 | package presearch 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | const ( 8 | // Variable params. 9 | paramQueryK = "q" 10 | paramPageK = "page" 11 | cookieSafeSearchK = "use_safe_search" // Can be "true" or "false". 12 | ) 13 | 14 | func safeSearchCookieString(safesearch bool) string { 15 | if safesearch { 16 | return fmt.Sprintf("%v=%v", cookieSafeSearchK, "true") 17 | } else { 18 | return fmt.Sprintf("%v=%v", cookieSafeSearchK, "false") 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/search/engines/presearch/presearch.md: -------------------------------------------------------------------------------- 1 | # Presearch 2 | 3 | It's open source, but there doesn't seem to be any website code: https://github.com/PresearchOfficial 4 | 5 | GET request: https://presearch.com/search?q=something&page=3 6 | Gets populated with API call: GET https://presearch.com/results?id=5b747ca66cc051a82a6c5bbb784a7fa5f802 7 | 8 | There are cookies: 9 | + settings cookies: 10 | + ai_results_disable:1 11 | + use_safe_search:true 12 | + session cookies: 13 | + presearch_session: eyJpdiI6InBtNVgzZE5YZnUvcXRldGNrZytzTWc9PSIsInZh[...] 14 | + XSRF-TOKEN: eyJpdiI6InN5MlM1Z3ovdkJuQzNBcW5MM0x6RkE9PSIsInZhbHVlI[...] 15 | + weird cookies: 16 | + b: 0 17 | + AWSALB: N5A3Uv4njhnPnihhwOzEBPWXwUZCx/KyphsluMdnYHL[...] 18 | + AWSALBCORS: N5A3Uv4njhnPnihhwOzEBPWXwUZCx/KyphsluMdnY[...] 19 | 20 | The id to pass to results is the JS variable "window.searchId" that gets set on the initial GET request, it is generated server-side -------------------------------------------------------------------------------- /src/search/engines/presearch/s_web_test.go: -------------------------------------------------------------------------------- 1 | package presearch 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestWebSearch(t *testing.T) { 11 | // Testing options. 12 | opt := _engines_test.NewOpts() 13 | 14 | // Test cases. 15 | tchar := []_engines_test.TestCaseHasAnyResults{{ 16 | Query: "ping", 17 | Options: opt, 18 | }} 19 | 20 | tccr := []_engines_test.TestCaseContainsResults{{ 21 | Query: "facebook", 22 | ResultURLs: []string{"facebook.com"}, 23 | Options: opt, 24 | }} 25 | 26 | tcrr := []_engines_test.TestCaseRankedResults{{ 27 | Query: "wikipedia", 28 | ResultURLs: []string{"wikipedia."}, 29 | Options: opt, 30 | }} 31 | 32 | se := New() 33 | se.InitSearcher(context.Background()) 34 | 35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) 36 | } 37 | -------------------------------------------------------------------------------- /src/search/engines/qwant/info.go: -------------------------------------------------------------------------------- 1 | package qwant 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | const ( 8 | seName = engines.QWANT 9 | searchURL = "https://api.qwant.com/v3/search/web" 10 | ) 11 | 12 | var origins = [...]engines.Name{seName, engines.BING} 13 | -------------------------------------------------------------------------------- /src/search/engines/qwant/json.go: -------------------------------------------------------------------------------- 1 | package qwant 2 | 3 | type jsonResponse struct { 4 | Status string `json:"status"` 5 | Data struct { 6 | Res struct { 7 | Items struct { 8 | Mainline []jsonMainlineItems `json:"mainline"` 9 | } `json:"items"` 10 | } `json:"result"` 11 | } `json:"data"` 12 | } 13 | 14 | type jsonMainlineItems struct { 15 | Type string `json:"type"` 16 | Items []jsonResults `json:"items"` 17 | } 18 | 19 | type jsonResults struct { 20 | Title string `json:"title"` 21 | URL string `json:"url"` 22 | Description string `json:"desc"` 23 | } 24 | -------------------------------------------------------------------------------- /src/search/engines/qwant/new.go: -------------------------------------------------------------------------------- 1 | package qwant 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | type Engine struct { 8 | scraper.EngineBase 9 | } 10 | 11 | func New() *Engine { 12 | return &Engine{scraper.EngineBase{ 13 | Name: seName, 14 | Origins: origins[:], 15 | }} 16 | } 17 | -------------------------------------------------------------------------------- /src/search/engines/qwant/params.go: -------------------------------------------------------------------------------- 1 | package qwant 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/hearchco/agent/src/search/engines/options" 7 | "github.com/rs/zerolog/log" 8 | ) 9 | 10 | const ( 11 | // Variable params. 12 | paramQueryK = "q" 13 | paramPageK = "offset" 14 | paramLocaleK = "locale" // Same as Locale, only the last two characters are lowered and not everything is supported. 15 | paramSafeSearchK = "safesearch" // Can be "0" or "1". 16 | 17 | // Constant params. 18 | paramCountK, paramCountV = "count", "10" 19 | ) 20 | 21 | var validLocales = [...]string{"bg_bg", "br_fr", "ca_ad", "ca_es", "ca_fr", "co_fr", "cs_cz", "cy_gb", "da_dk", "de_at", "de_ch", "de_de", "ec_ca", "el_gr", "en_au", "en_ca", "en_gb", "en_ie", "en_my", "en_nz", "en_us", "es_ad", "es_ar", "es_cl", "es_co", "es_es", "es_mx", "es_pe", "et_ee", "eu_es", "eu_fr", "fc_ca", "fi_fi", "fr_ad", "fr_be", "fr_ca", "fr_ch", "fr_fr", "gd_gb", "he_il", "hu_hu", "it_ch", "it_it", "ko_kr", "nb_no", "nl_be", "nl_nl", "pl_pl", "pt_ad", "pt_pt", "ro_ro", "sv_se", "th_th", "zh_cn", "zh_hk"} 22 | 23 | func localeParamValue(locale options.Locale) string { 24 | l := strings.ToLower(locale.String()) 25 | for _, vl := range validLocales { 26 | if l == vl { 27 | return l 28 | } 29 | } 30 | 31 | log.Debug(). 32 | Caller(). 33 | Str("locale", locale.String()). 34 | Strs("validLocales", validLocales[:]). 35 | Msg("Unsupported locale supplied for this engine, falling back to default") 36 | 37 | return strings.ToLower(options.LocaleDefault.String()) 38 | } 39 | 40 | func safeSearchParamValue(safesearch bool) string { 41 | if safesearch { 42 | return "1" 43 | } else { 44 | return "2" 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/search/engines/qwant/qwant.md: -------------------------------------------------------------------------------- 1 | # Qwant 2 | 3 | We access the api (https://api.qwant.com/v3/search/web) and set the necessary headers:
4 | 5 | ``` 6 | q: 7 | count: 10 8 | locale: en_GB 9 | offset: 10 10 | device: desktop 11 | safesearch: 1 12 | ``` 13 | 14 | To parse the incoming JSON we use https://pkg.go.dev/encoding/json#Unmarshal ([help](https://www.sohamkamani.com/golang/json/)). Especially note: 15 | 16 | > By default, object keys which don't have a corresponding struct field are ignored (see Decoder.DisallowUnknownFields for an alternative). 17 | 18 | We pass data to the colly callbacks like this: 19 | 20 | ``` 21 | colCtx := colly.NewContext() 22 | colCtx.Put("offset", strconv.Itoa(i*qResCount)) 23 | col.Request("GET", Info.URL, nil, colCtx, nil) 24 | ``` 25 | 26 | ^ Instead of colly.Visit(Info.URL) 27 | 28 | For the first result page `col.Visit(Info.URL + query + "&t=web&locale=" + qLocale + "&s=" + qSafeSearch)` could be used. This would emulate an actual user better. Its `.OnHTML` is implemented, but it seems to not play well with the API calls, having some results overlapp, this doesn't make any sense whatsoever. If this is used for first page, then `for i := 0; i < opts.Pages.Max; i++ {` needs start at 1 (i.e. `for i := 0; ....`). When it works and when it doesn't seems random - so it may be best to not touch it. Last query on which it didn't work: `./main --query="jako cudne stvari" --max-pages=2 -vv --visit` 29 | -------------------------------------------------------------------------------- /src/search/engines/qwant/s_web_test.go: -------------------------------------------------------------------------------- 1 | package qwant 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestWebSearch(t *testing.T) { 11 | // Testing options. 12 | opt := _engines_test.NewOpts() 13 | 14 | // Test cases. 15 | tchar := []_engines_test.TestCaseHasAnyResults{{ 16 | Query: "ping", 17 | Options: opt, 18 | }} 19 | 20 | tccr := []_engines_test.TestCaseContainsResults{{ 21 | Query: "facebook", 22 | ResultURLs: []string{"facebook.com"}, 23 | Options: opt, 24 | }} 25 | 26 | tcrr := []_engines_test.TestCaseRankedResults{{ 27 | Query: "wikipedia", 28 | ResultURLs: []string{"wikipedia."}, 29 | Options: opt, 30 | }} 31 | 32 | se := New() 33 | se.InitSearcher(context.Background()) 34 | 35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) 36 | } 37 | -------------------------------------------------------------------------------- /src/search/engines/startpage/dompaths.go: -------------------------------------------------------------------------------- 1 | package startpage 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | var dompaths = scraper.DOMPaths{ 8 | Result: "div.w-gl > div.result", 9 | URL: "a.result-title", 10 | Title: "a.result-title", 11 | Description: "p.description", 12 | } 13 | -------------------------------------------------------------------------------- /src/search/engines/startpage/info.go: -------------------------------------------------------------------------------- 1 | package startpage 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | const ( 8 | seName = engines.STARTPAGE 9 | searchURL = "https://www.startpage.com/sp/search" 10 | ) 11 | 12 | var origins = [...]engines.Name{seName, engines.GOOGLE} 13 | -------------------------------------------------------------------------------- /src/search/engines/startpage/new.go: -------------------------------------------------------------------------------- 1 | package startpage 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | type Engine struct { 8 | scraper.EngineBase 9 | } 10 | 11 | func New() *Engine { 12 | return &Engine{scraper.EngineBase{ 13 | Name: seName, 14 | Origins: origins[:], 15 | }} 16 | } 17 | -------------------------------------------------------------------------------- /src/search/engines/startpage/params.go: -------------------------------------------------------------------------------- 1 | package startpage 2 | 3 | const ( 4 | // Variable params. 5 | paramQueryK = "q" 6 | paramPageK = "page" 7 | 8 | // Constant params. 9 | paramSafeSearchK, paramSafeSearchV = "qadf", "none" // Can be "none" or empty param (empty means it's enabled). 10 | ) 11 | -------------------------------------------------------------------------------- /src/search/engines/startpage/s_web_test.go: -------------------------------------------------------------------------------- 1 | package startpage 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestWebSearch(t *testing.T) { 11 | // Testing options. 12 | opt := _engines_test.NewOpts() 13 | 14 | // Test cases. 15 | tchar := []_engines_test.TestCaseHasAnyResults{{ 16 | Query: "ping", 17 | Options: opt, 18 | }} 19 | 20 | tccr := []_engines_test.TestCaseContainsResults{{ 21 | Query: "facebook", 22 | ResultURLs: []string{"facebook.com"}, 23 | Options: opt, 24 | }} 25 | 26 | tcrr := []_engines_test.TestCaseRankedResults{{ 27 | Query: "wikipedia", 28 | ResultURLs: []string{"wikipedia."}, 29 | Options: opt, 30 | }} 31 | 32 | se := New() 33 | se.InitSearcher(context.Background()) 34 | 35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) 36 | } 37 | -------------------------------------------------------------------------------- /src/search/engines/startpage/startpage.md: -------------------------------------------------------------------------------- 1 | # Startpage 2 | 3 | First search: POST request to https://www.startpage.com/sp/search 4 | with body: abp=-1&additional=%5Bobject+Object%5D&cat=web&language=english&lui=english&query=some+query&sc=BSuId774jcrp20&sgt=1691175704T0afc510362af195aa4ac76bde15e32e85914a4901124669719eaac0e2c326f15&t= 5 | 6 | Sending just cat,language,lui,query gets this: 7 | ![Alt text](image.png) 8 | 9 | Resending the previous request gets this: 10 | ![Alt text](image-1.png) 11 | 12 | Request to second page: POST request to https://www.startpage.com/sp/search 13 | with body: language=english&lui=english&abp=-1&query=some+query&cat=web&page=2&sc=HLlIFdefdQOM20 14 | 15 | Resending it worked fine. 16 | 17 | Changing HLlIFdefdQOM20 to HLlIFdefdZOM20 and resending worked fine. Changing it to aaaaaaaaaaaaaa redirects to an error page, that sends the javascript message. The sc value is plainly set in the html (form#search > input[name="sc"]). When last page is hit: 18 | ![Alt text](image-2.png) 19 | 20 | Doesnt use cookies. 21 | 22 | + Safe search is on: add qadf=heavy to POST body 23 | + Safe search is off: add qadf=none to POST body 24 | - Not sure if it needs to be set with every request 25 | 26 | Disabling javascript in browser settings gets the **Error 883** page. However, sending requests through GET: https://www.startpage.com/sp/search?q= works even if javascript is disabled. The GET request works with no cookies / body. For the page, the `page` URL parameter is used. E.g. https://www.startpage.com/sp/search?q=i+dont+get+it&page=3 27 | 28 | 29 | # Locale 30 | The locale is set with the POST body `qloc` variable and looks something like this: 31 | `JTdCJTIyY2MlMjIlM0ElMjJVUyUyMiUyQyUyMmxvY2F0aW9uJTIyJTNBJTIyVW5pdGVkJTIwU3RhdGVzJTIyJTJDJTIyc3RhdGVfY29kZSUyMiUzQSUyMjAwJTIyJTJDJTIydHlwZSUyMiUzQSUyMmN1c3RvbV9sb2NhdGlvbiUyMiU3RA%3D%3D`\ 32 | If we replace `%3D`s with `=`s we can base64 decode it into: 33 | `%7B%22cc%22%3A%22US%22%2C%22location%22%3A%22United%20States%22%2C%22state_code%22%3A%2200%22%2C%22type%22%3A%22custom_location%22%7D`\ 34 | Which we can url decode into: 35 | `{"cc":"US","location":"United States","state_code":"00","type":"custom_location"}` 36 | Another decoded example is 37 | `{"cc":"CN","location":"People’s Republic of China","state_code":"00","type":"custom_location"}` 38 | It seems for states, the `state_code` is always `00` and the `type` is always `custom_location`. The `location` parameter may be irrelevant, and spoofing `cc` could be sufficent. However, the results dont seem to change when the region is changed, so its impossible to test. 39 | -------------------------------------------------------------------------------- /src/search/engines/swisscows/authenticator.go: -------------------------------------------------------------------------------- 1 | package swisscows 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "strings" 7 | "time" 8 | "unicode" 9 | 10 | "github.com/hearchco/agent/src/utils/anonymize" 11 | ) 12 | 13 | const alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" 14 | 15 | // Returns nonce and signature. 16 | func generateAuth(params string) (string, string, error) { 17 | paramsWOP := strings.ReplaceAll(params, "+", " ") 18 | nonce := generateNonce(32) 19 | 20 | auth, err := generateSignature(paramsWOP, nonce) 21 | if err != nil { 22 | return "", "", fmt.Errorf("failed to generate auth (nonce and signature): %w", err) 23 | } 24 | 25 | return nonce, auth, nil 26 | } 27 | 28 | func generateNonce(length int) string { 29 | r := rand.New(rand.NewSource(time.Now().UnixNano())) 30 | 31 | nonce := "" 32 | for range length { 33 | randInd := r.Intn(length) 34 | nonce += string(alphabet[randInd]) 35 | } 36 | 37 | return nonce 38 | } 39 | 40 | func generateSignature(params string, nonce string) (string, error) { 41 | rot13Nonce := rot13Switch(nonce) 42 | data := "/web/search" + params + rot13Nonce 43 | encData := anonymize.CalculateHashBase64(data) 44 | encData = strings.ReplaceAll(encData, "=", "") 45 | encData = strings.ReplaceAll(encData, "+", "-") 46 | encData = strings.ReplaceAll(encData, "/", "_") 47 | 48 | return encData, nil 49 | } 50 | 51 | func rot13Switch(str string) string { 52 | return switchCapitalization(rot13(str)) 53 | } 54 | 55 | // Performs rot13 and switches capitalization of each character. 56 | func rot13(str string) string { 57 | result := "" 58 | 59 | for i := range len(str) { 60 | result += string(rot13Byte(str[i])) 61 | } 62 | 63 | return result 64 | } 65 | 66 | func rot13Byte(b byte) byte { 67 | var a, z byte 68 | 69 | switch { 70 | case 'a' <= b && b <= 'z': 71 | a, z = 'a', 'z' 72 | case 'A' <= b && b <= 'Z': 73 | a, z = 'A', 'Z' 74 | default: 75 | return b 76 | } 77 | 78 | return (b-a+13)%(z-a+1) + a 79 | } 80 | 81 | func switchCapitalization(str string) string { 82 | res := "" 83 | 84 | for i := range len(str) { 85 | if unicode.IsUpper(rune(str[i])) { 86 | res += string(unicode.ToLower(rune(str[i]))) 87 | } else { 88 | res += string(unicode.ToUpper(rune(str[i]))) 89 | } 90 | } 91 | 92 | return res 93 | } 94 | -------------------------------------------------------------------------------- /src/search/engines/swisscows/info.go: -------------------------------------------------------------------------------- 1 | package swisscows 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | const ( 8 | seName = engines.SWISSCOWS 9 | searchURL = "https://api.swisscows.com/web/search" 10 | ) 11 | 12 | var origins = [...]engines.Name{seName, engines.BING} 13 | -------------------------------------------------------------------------------- /src/search/engines/swisscows/json.go: -------------------------------------------------------------------------------- 1 | package swisscows 2 | 3 | type jsonResponse struct { 4 | Items []jsonItem `json:"items"` 5 | } 6 | 7 | type jsonItem struct { 8 | Id string `json:"id"` 9 | Title string `json:"title"` 10 | Desc string `json:"description"` 11 | URL string `json:"url"` 12 | DisplayURL string `json:"displayUrl"` 13 | } 14 | -------------------------------------------------------------------------------- /src/search/engines/swisscows/new.go: -------------------------------------------------------------------------------- 1 | package swisscows 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | type Engine struct { 8 | scraper.EngineBase 9 | } 10 | 11 | func New() *Engine { 12 | return &Engine{scraper.EngineBase{ 13 | Name: seName, 14 | Origins: origins[:], 15 | }} 16 | } 17 | -------------------------------------------------------------------------------- /src/search/engines/swisscows/params.go: -------------------------------------------------------------------------------- 1 | package swisscows 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/hearchco/agent/src/search/engines/options" 7 | ) 8 | 9 | const ( 10 | // Variable params. 11 | paramQueryK = "query" 12 | paramPageK = "offset" 13 | paramLocaleK = "region" // Should be the same as Locale, only with "_" replaced by "-". 14 | 15 | // Constant params. 16 | paramFreshnessK, paramFreshnessV = "freshness", "All" 17 | paramItemsK, paramItemsV = "itemsCount", "10" 18 | ) 19 | 20 | func localeParamValue(locale options.Locale) string { 21 | return strings.Replace(locale.String(), "_", "-", 1) 22 | } 23 | -------------------------------------------------------------------------------- /src/search/engines/swisscows/s_web_test.go: -------------------------------------------------------------------------------- 1 | package swisscows 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestWebSearch(t *testing.T) { 11 | // Testing options. 12 | opt := _engines_test.NewOpts() 13 | 14 | // Test cases. 15 | tchar := []_engines_test.TestCaseHasAnyResults{{ 16 | Query: "ping", 17 | Options: opt, 18 | }} 19 | 20 | tccr := []_engines_test.TestCaseContainsResults{{ 21 | Query: "facebook", 22 | ResultURLs: []string{"facebook.com"}, 23 | Options: opt, 24 | }} 25 | 26 | tcrr := []_engines_test.TestCaseRankedResults{{ 27 | Query: "wikipedia", 28 | ResultURLs: []string{"wikipedia."}, 29 | Options: opt, 30 | }} 31 | 32 | se := New() 33 | se.InitSearcher(context.Background()) 34 | 35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) 36 | } 37 | -------------------------------------------------------------------------------- /src/search/engines/yahoo/dompaths.go: -------------------------------------------------------------------------------- 1 | package yahoo 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | var dompaths = scraper.DOMPaths{ 8 | Result: "div#main > div > div#web > ol > li > div.algo", 9 | URL: "h3.title > a", 10 | Title: "h3.title > a", 11 | Description: "div > div.compText > p > span", 12 | } 13 | -------------------------------------------------------------------------------- /src/search/engines/yahoo/info.go: -------------------------------------------------------------------------------- 1 | package yahoo 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | const ( 8 | seName = engines.YAHOO 9 | searchURL = "https://search.yahoo.com/search" 10 | ) 11 | 12 | var origins = [...]engines.Name{seName, engines.BING} 13 | -------------------------------------------------------------------------------- /src/search/engines/yahoo/new.go: -------------------------------------------------------------------------------- 1 | package yahoo 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/scraper" 5 | ) 6 | 7 | type Engine struct { 8 | scraper.EngineBase 9 | } 10 | 11 | func New() *Engine { 12 | return &Engine{scraper.EngineBase{ 13 | Name: seName, 14 | Origins: origins[:], 15 | }} 16 | } 17 | -------------------------------------------------------------------------------- /src/search/engines/yahoo/params.go: -------------------------------------------------------------------------------- 1 | package yahoo 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | const ( 8 | // Variable params. 9 | paramQueryK = "p" 10 | paramPageK = "b" 11 | cookieSafeSearchK = "vm" // Can be "p" (disabled) or "r" (enabled). 12 | 13 | // Constant params. 14 | cookieSafeSearchPrefix = "sB=v=1&pn=10&rw=new&userset=0" 15 | // paramSbK, paramSbV = "sB", "v=1" 16 | // paramPnK, paramPnV = "pn", "10" 17 | // paramRwK, paramRwV = "rw", "new" 18 | // paramUsersetK, paramUsersetV = "userset", "0" 19 | ) 20 | 21 | func safeSearchCookieString(safesearch bool) string { 22 | if safesearch { 23 | return fmt.Sprintf("%v&%v=%v", cookieSafeSearchPrefix, cookieSafeSearchK, "r") 24 | } else { 25 | return fmt.Sprintf("%v&%v=%v", cookieSafeSearchPrefix, cookieSafeSearchK, "p") 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/search/engines/yahoo/s_web_test.go: -------------------------------------------------------------------------------- 1 | package yahoo 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hearchco/agent/src/search/engines/_engines_test" 8 | ) 9 | 10 | func TestWebSearch(t *testing.T) { 11 | // Testing options. 12 | opt := _engines_test.NewOpts() 13 | 14 | // Test cases. 15 | tchar := []_engines_test.TestCaseHasAnyResults{{ 16 | Query: "ping", 17 | Options: opt, 18 | }} 19 | 20 | tccr := []_engines_test.TestCaseContainsResults{{ 21 | Query: "facebook", 22 | ResultURLs: []string{"facebook.com"}, 23 | Options: opt, 24 | }} 25 | 26 | tcrr := []_engines_test.TestCaseRankedResults{{ 27 | Query: "wikipedia", 28 | ResultURLs: []string{"wikipedia."}, 29 | Options: opt, 30 | }} 31 | 32 | se := New() 33 | se.InitSearcher(context.Background()) 34 | 35 | _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) 36 | } 37 | -------------------------------------------------------------------------------- /src/search/engines/yahoo/telemetry.go: -------------------------------------------------------------------------------- 1 | package yahoo 2 | 3 | import ( 4 | "net/url" 5 | "strings" 6 | ) 7 | 8 | func removeTelemetry(urll string) (string, error) { 9 | if !strings.Contains(urll, "://r.search.yahoo.com/") { 10 | return urll, nil 11 | } 12 | 13 | suff := strings.SplitAfterN(urll, "/RU=http", 2)[1] 14 | urll = "http" + strings.SplitN(suff, "/RK=", 2)[0] 15 | 16 | newLink, err := url.QueryUnescape(urll) 17 | if err != nil { 18 | return "", err 19 | } 20 | 21 | return newLink, nil 22 | } 23 | -------------------------------------------------------------------------------- /src/search/engines/yep/info.go: -------------------------------------------------------------------------------- 1 | package yep 2 | 3 | // import ( 4 | // "github.com/hearchco/agent/src/search/engines" 5 | // ) 6 | 7 | // const ( 8 | // seName = engines.YEP 9 | // searchURL = "https://api.yep.com/fs/2/search" 10 | // ) 11 | 12 | // var origins = [...]engines.Name{seName} 13 | -------------------------------------------------------------------------------- /src/search/engines/yep/json.go: -------------------------------------------------------------------------------- 1 | package yep 2 | 3 | // type jsonResponse struct { 4 | // Results []jsonResult `json:"results"` 5 | // } 6 | 7 | // type jsonResult struct { 8 | // URL string `json:"url"` 9 | // Title string `json:"title"` 10 | // TType string `json:"type"` 11 | // Snippet string `json:"snippet"` 12 | // } 13 | -------------------------------------------------------------------------------- /src/search/engines/yep/new.go: -------------------------------------------------------------------------------- 1 | package yep 2 | 3 | // import ( 4 | // "github.com/hearchco/agent/src/search/scraper" 5 | // ) 6 | 7 | // type Engine struct { 8 | // scraper.EngineBase 9 | // } 10 | 11 | // func New() *Engine { 12 | // return &Engine{scraper.EngineBase{ 13 | // Name: seName, 14 | // Origins: origins[:], 15 | // }} 16 | // } 17 | -------------------------------------------------------------------------------- /src/search/engines/yep/params.go: -------------------------------------------------------------------------------- 1 | package yep 2 | 3 | // import ( 4 | // "fmt" 5 | // "strings" 6 | 7 | // "github.com/hearchco/agent/src/search/engines/options" 8 | // ) 9 | 10 | // const ( 11 | // paramKeyPage = "limit" 12 | // paramKeyLocale = "gl" // Should be last 2 characters of Locale. 13 | // paramKeySafeSearch = "safeSearch" // Can be "off" or "strict". 14 | 15 | // paramClient = "client=web" 16 | // paramNo_correct = "no_correct=false" 17 | // paramType = "type=web" 18 | // ) 19 | 20 | // func localeParamString(locale options.Locale) string { 21 | // country := strings.Split(locale.String(), "_")[1] 22 | // return fmt.Sprintf("%v=%v", paramKeyLocale, country) 23 | // } 24 | 25 | // func safeSearchParamString(safesearch bool) string { 26 | // if safesearch { 27 | // return fmt.Sprintf("%v=%v", paramKeySafeSearch, "strict") 28 | // } else { 29 | // return fmt.Sprintf("%v=%v", paramKeySafeSearch, "off") 30 | // } 31 | // } 32 | -------------------------------------------------------------------------------- /src/search/engines/yep/s_web_test.go: -------------------------------------------------------------------------------- 1 | package yep 2 | 3 | // import ( 4 | // "context" 5 | // "testing" 6 | 7 | // "github.com/hearchco/agent/src/search/category" 8 | // "github.com/hearchco/agent/src/search/engines/_engines_test" 9 | // ) 10 | 11 | // func TestWebSearch(t *testing.T) { 12 | // // Testing options. 13 | // conf := _engines_test.NewConfig(seName) 14 | // opt := _engines_test.NewOpts() 15 | 16 | // // Test cases. 17 | // tchar := []_engines_test.TestCaseHasAnyResults{{ 18 | // Query: "ping", 19 | // Options: opt, 20 | // }} 21 | 22 | // tccr := []_engines_test.TestCaseContainsResults{{ 23 | // Query: "youtube", 24 | // ResultURLs: []string{"youtube.com"}, 25 | // Options: opt, 26 | // }} 27 | 28 | // tcrr := []_engines_test.TestCaseRankedResults{{ 29 | // Query: "wikipedia", 30 | // ResultURLs: []string{"wikipedia."}, 31 | // Options: opt, 32 | // }} 33 | 34 | // se := New() 35 | // se.Init(context.Background()) 36 | 37 | // _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) 38 | // } 39 | -------------------------------------------------------------------------------- /src/search/groups.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | const groupRequired = "required" 4 | const groupRequiredByOrigin = "required_by_origin" 5 | const groupPreferred = "preferred" 6 | const groupPreferredByOrigin = "preferred_by_origin" 7 | -------------------------------------------------------------------------------- /src/search/init.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/hearchco/agent/src/search/engines" 7 | "github.com/hearchco/agent/src/search/scraper" 8 | ) 9 | 10 | // Initialize web searchers. 11 | func initializeWebSearchers(ctx context.Context, engs []engines.Name) []scraper.WebSearcher { 12 | searchers := webSearcherArray() 13 | for _, engName := range engs { 14 | searchers[engName].InitSearcher(ctx) 15 | } 16 | return searchers[:] 17 | } 18 | 19 | // Initialize image searchers. 20 | func initializeImageSearchers(ctx context.Context, engs []engines.Name) []scraper.ImageSearcher { 21 | searchers := imageSearcherArray() 22 | for _, engName := range engs { 23 | searchers[engName].InitSearcher(ctx) 24 | } 25 | return searchers[:] 26 | } 27 | 28 | // Initialize suggesters. 29 | func initializeSuggesters(ctx context.Context, engs []engines.Name) []scraper.Suggester { 30 | suggesters := suggesterArray() 31 | for _, engName := range engs { 32 | suggesters[engName].InitSuggester(ctx) 33 | } 34 | return suggesters[:] 35 | } 36 | -------------------------------------------------------------------------------- /src/search/once.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "sync" 5 | "sync/atomic" 6 | 7 | "github.com/hearchco/agent/src/search/engines" 8 | ) 9 | 10 | type onceWrapper struct { 11 | once *sync.Once 12 | errored atomic.Bool 13 | scraped atomic.Bool 14 | } 15 | 16 | func initOnceWrapper(engs []engines.Name) map[engines.Name]*onceWrapper { 17 | onceWrapMap := make(map[engines.Name]*onceWrapper, len(engs)) 18 | for _, eng := range engs { 19 | onceWrapMap[eng] = &onceWrapper{ 20 | once: &sync.Once{}, 21 | errored: atomic.Bool{}, 22 | scraped: atomic.Bool{}, 23 | } 24 | } 25 | return onceWrapMap 26 | } 27 | 28 | func (ow *onceWrapper) Do(f func()) { 29 | ow.once.Do(f) 30 | } 31 | 32 | func (ow *onceWrapper) Errored() { 33 | if !ow.errored.Load() { 34 | ow.errored.Store(true) 35 | } 36 | } 37 | 38 | func (ow *onceWrapper) Scraped() { 39 | if !ow.scraped.Load() { 40 | ow.scraped.Store(true) 41 | } 42 | } 43 | 44 | func (ow *onceWrapper) Success() bool { 45 | return !ow.errored.Load() && ow.scraped.Load() 46 | } 47 | -------------------------------------------------------------------------------- /src/search/params.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/hearchco/agent/src/search/engines/options" 7 | ) 8 | 9 | func validateParams(query string, opts options.Options) error { 10 | if query == "" { 11 | return fmt.Errorf("query can't be empty") 12 | } 13 | if opts.Locale == "" { 14 | return fmt.Errorf("locale can't be empty") 15 | } 16 | if opts.Pages.Start < 0 { 17 | return fmt.Errorf("pages start can't be negative") 18 | } 19 | if opts.Pages.Max < 1 { 20 | return fmt.Errorf("pages max can't be less than 1") 21 | } 22 | 23 | return nil 24 | } 25 | -------------------------------------------------------------------------------- /src/search/receiver.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/hearchco/agent/src/search/result" 7 | ) 8 | 9 | func createReceiver[T any](wg *sync.WaitGroup, valChan chan T, concMap result.ConcMapper[T]) { 10 | // Signal that the receiver is done. 11 | defer wg.Done() 12 | 13 | for recVal := range valChan { 14 | concMap.AddOrUpgrade(recVal) 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/search/result/construct.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | import ( 4 | "fmt" 5 | "net/url" 6 | 7 | "github.com/hearchco/agent/src/search/engines" 8 | ) 9 | 10 | func ConstructResult(seName engines.Name, urll string, title string, description string, page int, onPageRank int) (WebScraped, error) { 11 | if urll == "" { 12 | return WebScraped{}, fmt.Errorf("invalid URL: empty") 13 | } 14 | 15 | u, err := url.Parse(urll) 16 | if err != nil { 17 | return WebScraped{}, fmt.Errorf("invalid URL: %s", err) 18 | } 19 | 20 | if u.Hostname() == "" { 21 | return WebScraped{}, fmt.Errorf("invalid URL: no hostname") 22 | } 23 | 24 | if title == "" { 25 | return WebScraped{}, fmt.Errorf("invalid title: empty") 26 | } 27 | 28 | if page <= 0 { 29 | return WebScraped{}, fmt.Errorf("invalid page: %d", page) 30 | } 31 | 32 | if onPageRank <= 0 { 33 | return WebScraped{}, fmt.Errorf("invalid onPageRank: %d", onPageRank) 34 | } 35 | 36 | return WebScraped{ 37 | url: u.String(), 38 | title: title, 39 | description: description, 40 | rank: RankScraped{ 41 | RankSimpleScraped{ 42 | searchEngine: seName, 43 | rank: 0, // This gets calculated when ranking the results. 44 | }, 45 | page, 46 | onPageRank, 47 | }, 48 | }, nil 49 | } 50 | 51 | func ConstructImagesResult( 52 | seName engines.Name, urll string, title string, description string, page int, onPageRank int, 53 | originalHeight int, originalWidth int, thumbnailHeight int, thumbnailWidth int, 54 | thumbnailUrl string, sourceName string, sourceUrl string, 55 | ) (ImagesScraped, error) { 56 | res, err := ConstructResult(seName, urll, title, description, page, onPageRank) 57 | if err != nil { 58 | return ImagesScraped{}, err 59 | } 60 | 61 | if originalHeight <= 0 { 62 | return ImagesScraped{}, fmt.Errorf("invalid originalHeight: %d", originalHeight) 63 | } 64 | 65 | if originalWidth <= 0 { 66 | return ImagesScraped{}, fmt.Errorf("invalid originalWidth: %d", originalWidth) 67 | } 68 | 69 | if thumbnailHeight <= 0 { 70 | return ImagesScraped{}, fmt.Errorf("invalid thumbnailHeight: %d", thumbnailHeight) 71 | } 72 | 73 | if thumbnailWidth <= 0 { 74 | return ImagesScraped{}, fmt.Errorf("invalid thumbnailWidth: %d", thumbnailWidth) 75 | } 76 | 77 | if thumbnailUrl == "" { 78 | return ImagesScraped{}, fmt.Errorf("invalid thumbnailUrl: empty") 79 | } 80 | 81 | if sourceUrl == "" { 82 | return ImagesScraped{}, fmt.Errorf("invalid sourceUrl: empty") 83 | } 84 | 85 | return ImagesScraped{ 86 | WebScraped: res, 87 | 88 | originalSize: scrapedImageFormat{ 89 | height: originalHeight, 90 | width: originalWidth, 91 | }, 92 | thumbnailSize: scrapedImageFormat{ 93 | height: thumbnailHeight, 94 | width: thumbnailWidth, 95 | }, 96 | thumbnailURL: thumbnailUrl, 97 | sourceName: sourceName, 98 | sourceURL: sourceUrl, 99 | }, nil 100 | } 101 | -------------------------------------------------------------------------------- /src/search/result/interfaces.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | type Result interface { 4 | Key() string 5 | URL() string 6 | FQDN() string 7 | Title() string 8 | Description() string 9 | SetDescription(string) 10 | Rank() int 11 | SetRank(int) 12 | Score() float64 13 | SetScore(float64) 14 | EngineRanks() []Rank 15 | InitEngineRanks() 16 | ShrinkEngineRanks() 17 | AppendEngineRanks(Rank) 18 | ConvertToOutput(string) ResultOutput 19 | Shorten(int, int) Result 20 | } 21 | 22 | type ResultScraped interface { 23 | Key() string 24 | URL() string 25 | Title() string 26 | Description() string 27 | Rank() RankScraped 28 | Convert(int) Result 29 | } 30 | 31 | type ConcMapper[T any] interface { 32 | AddOrUpgrade(T) 33 | } 34 | -------------------------------------------------------------------------------- /src/search/result/output.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | type ResultOutput any 4 | 5 | func ConvertToOutput(results []Result, secret string) []ResultOutput { 6 | var output = make([]ResultOutput, 0, len(results)) 7 | for _, r := range results { 8 | output = append(output, r.ConvertToOutput(secret)) 9 | } 10 | return output 11 | } 12 | 13 | func ConvertSuggestionsToOutput(suggestions []Suggestion) []string { 14 | var output = make([]string, 0, len(suggestions)) 15 | for _, s := range suggestions { 16 | output = append(output, s.Value()) 17 | } 18 | return output 19 | } 20 | -------------------------------------------------------------------------------- /src/search/result/r_images.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/rs/zerolog/log" 7 | 8 | "github.com/hearchco/agent/src/utils/anonymize" 9 | ) 10 | 11 | type Images struct { 12 | imagesJSON 13 | } 14 | 15 | type imagesJSON struct { 16 | Web 17 | 18 | OriginalSize ImageFormat `json:"original"` 19 | ThumbnailSize ImageFormat `json:"thumbnail"` 20 | ThumbnailURL string `json:"thumbnail_url"` 21 | SourceName string `json:"source"` 22 | SourceURL string `json:"source_url"` 23 | } 24 | 25 | type ImageFormat struct { 26 | Height int `json:"height"` 27 | Width int `json:"width"` 28 | } 29 | 30 | func (r Images) OriginalSize() ImageFormat { 31 | if r.imagesJSON.OriginalSize.Height == 0 || r.imagesJSON.OriginalSize.Width == 0 { 32 | log.Panic(). 33 | Int("height", r.imagesJSON.OriginalSize.Height). 34 | Int("width", r.imagesJSON.OriginalSize.Width). 35 | Msg("OriginalSize is zero") 36 | // ^PANIC - Assert because the OriginalSize should never be zero. 37 | } 38 | 39 | return r.imagesJSON.OriginalSize 40 | } 41 | 42 | func (r Images) ThumbnailSize() ImageFormat { 43 | if r.imagesJSON.ThumbnailSize.Height == 0 || r.imagesJSON.ThumbnailSize.Width == 0 { 44 | log.Panic(). 45 | Int("height", r.imagesJSON.ThumbnailSize.Height). 46 | Int("width", r.imagesJSON.ThumbnailSize.Width). 47 | Msg("ThumbnailSize is zero") 48 | // ^PANIC - Assert because the ThumbnailSize should never be zero. 49 | } 50 | 51 | return r.imagesJSON.ThumbnailSize 52 | } 53 | 54 | func (r Images) ThumbnailURL() string { 55 | if r.imagesJSON.ThumbnailURL == "" { 56 | log.Panic().Msg("ThumbnailURL is empty") 57 | // ^PANIC - Assert because the ThumbnailURL should never be empty. 58 | } 59 | 60 | return r.imagesJSON.ThumbnailURL 61 | } 62 | 63 | func (r Images) SourceName() string { 64 | return r.imagesJSON.SourceName 65 | } 66 | 67 | func (r Images) SourceURL() string { 68 | return r.imagesJSON.SourceURL 69 | } 70 | 71 | func (r Images) ConvertToOutput(secret string) ResultOutput { 72 | nowT := time.Now() 73 | fqdnHash, fqdnTimestamp := anonymize.CalculateHMACBase64(r.FQDN(), secret, nowT) 74 | urlHash, urlTimestamp := anonymize.CalculateHMACBase64(r.URL(), secret, nowT) 75 | thmbHash, thmbTimestamp := anonymize.CalculateHMACBase64(r.ThumbnailURL(), secret, nowT) 76 | 77 | return ImagesOutput{ 78 | imagesOutputJSON{ 79 | r, 80 | fqdnHash, 81 | fqdnTimestamp, 82 | urlHash, 83 | urlTimestamp, 84 | thmbHash, 85 | thmbTimestamp, 86 | }, 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/search/result/r_images_output.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | type ImagesOutput struct { 4 | imagesOutputJSON 5 | } 6 | 7 | type imagesOutputJSON struct { 8 | Images 9 | 10 | FqdnHash string `json:"fqdn_hash,omitempty"` 11 | FqdnHashTimestamp string `json:"fqdn_hash_timestamp,omitempty"` 12 | URLHash string `json:"url_hash,omitempty"` 13 | URLHashTimestamp string `json:"url_hash_timestamp,omitempty"` 14 | ThumbnailURLHash string `json:"thumbnail_url_hash,omitempty"` 15 | ThumbnailURLHashTimestamp string `json:"thumbnail_url_hash_timestamp,omitempty"` 16 | } 17 | -------------------------------------------------------------------------------- /src/search/result/r_images_scraped.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/utils/moreurls" 5 | "github.com/rs/zerolog/log" 6 | ) 7 | 8 | type ImagesScraped struct { 9 | WebScraped 10 | 11 | originalSize scrapedImageFormat 12 | thumbnailSize scrapedImageFormat 13 | thumbnailURL string 14 | sourceName string 15 | sourceURL string 16 | } 17 | 18 | func (r ImagesScraped) OriginalSize() scrapedImageFormat { 19 | if r.originalSize.height == 0 || r.originalSize.width == 0 { 20 | log.Panic(). 21 | Int("height", r.originalSize.height). 22 | Int("width", r.originalSize.width). 23 | Msg("OriginalSize is zero") 24 | // ^PANIC - Assert because the OriginalSize should never be zero. 25 | } 26 | 27 | return r.originalSize 28 | } 29 | 30 | func (r ImagesScraped) ThumbnailSize() scrapedImageFormat { 31 | if r.thumbnailSize.height == 0 || r.thumbnailSize.width == 0 { 32 | log.Panic(). 33 | Int("height", r.thumbnailSize.height). 34 | Int("width", r.thumbnailSize.width). 35 | Msg("ThumbnailSize is zero") 36 | // ^PANIC - Assert because the ThumbnailSize should never be zero. 37 | } 38 | 39 | return r.thumbnailSize 40 | } 41 | 42 | func (r ImagesScraped) ThumbnailURL() string { 43 | if r.thumbnailURL == "" { 44 | log.Panic().Msg("ThumbnailURL is empty") 45 | // ^PANIC - Assert because the ThumbnailURL should never be empty. 46 | } 47 | 48 | return r.thumbnailURL 49 | } 50 | 51 | func (r ImagesScraped) SourceName() string { 52 | return r.sourceName 53 | } 54 | 55 | func (r ImagesScraped) SourceURL() string { 56 | return r.sourceURL 57 | } 58 | 59 | func (r ImagesScraped) Convert(erCap int) Result { 60 | engineRanks := make([]Rank, 0, erCap) 61 | engineRanks = append(engineRanks, r.Rank().Convert()) 62 | return &Images{ 63 | imagesJSON{ 64 | Web{ 65 | webJSON{ 66 | URL: r.URL(), 67 | FQDN: moreurls.FQDN(r.URL()), 68 | Title: r.Title(), 69 | Description: r.Description(), 70 | EngineRanks: engineRanks, 71 | }, 72 | }, 73 | r.OriginalSize().Convert(), 74 | r.ThumbnailSize().Convert(), 75 | r.ThumbnailURL(), 76 | r.SourceName(), 77 | r.SourceURL(), 78 | }, 79 | } 80 | } 81 | 82 | type scrapedImageFormat struct { 83 | height int 84 | width int 85 | } 86 | 87 | func (i scrapedImageFormat) GetHeight() int { 88 | if i.height == 0 { 89 | log.Panic().Msg("Height is zero") 90 | // ^PANIC - Assert because the Height should never be zero. 91 | } 92 | 93 | return i.height 94 | } 95 | 96 | func (i scrapedImageFormat) GetWidth() int { 97 | if i.width == 0 { 98 | log.Panic().Msg("Width is zero") 99 | // ^PANIC - Assert because the Width should never be zero. 100 | } 101 | 102 | return i.width 103 | } 104 | 105 | func (i scrapedImageFormat) Convert() ImageFormat { 106 | return ImageFormat{ 107 | Height: i.height, 108 | Width: i.width, 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/search/result/r_suggestion.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | import ( 4 | "github.com/rs/zerolog/log" 5 | ) 6 | 7 | type Suggestion struct { 8 | suggestionJSON 9 | } 10 | 11 | type suggestionJSON struct { 12 | Value string `json:"value"` 13 | Rank int `json:"rank"` 14 | Score float64 `json:"score"` 15 | EngineRanks []RankSimple `json:"engine_ranks"` 16 | } 17 | 18 | func (s Suggestion) Value() string { 19 | return s.suggestionJSON.Value 20 | } 21 | 22 | func (s Suggestion) Rank() int { 23 | return s.suggestionJSON.Rank 24 | } 25 | 26 | func (s *Suggestion) SetRank(rank int) { 27 | s.suggestionJSON.Rank = rank 28 | } 29 | 30 | func (s Suggestion) Score() float64 { 31 | return s.suggestionJSON.Score 32 | } 33 | 34 | func (s *Suggestion) SetScore(score float64) { 35 | s.suggestionJSON.Score = score 36 | } 37 | 38 | func (s Suggestion) EngineRanks() []RankSimple { 39 | if s.suggestionJSON.EngineRanks == nil { 40 | log.Panic().Msg("EngineRanks is nil") 41 | // ^PANIC - Assert because the EngineRanks should never be nil. 42 | } 43 | 44 | return s.suggestionJSON.EngineRanks 45 | } 46 | 47 | func (s *Suggestion) ShrinkEngineRanks() { 48 | if s.suggestionJSON.EngineRanks == nil { 49 | log.Panic().Msg("EngineRanks is nil") 50 | // ^PANIC - Assert because the EngineRanks should never be nil. 51 | } 52 | 53 | ranksLen := len(s.suggestionJSON.EngineRanks) 54 | s.suggestionJSON.EngineRanks = s.suggestionJSON.EngineRanks[:ranksLen:ranksLen] 55 | } 56 | 57 | func (s *Suggestion) AppendEngineRanks(rank RankSimple) { 58 | if s.suggestionJSON.EngineRanks == nil { 59 | log.Panic().Msg("EngineRanks is nil") 60 | // ^PANIC - Assert because the EngineRanks should never be nil. 61 | } 62 | 63 | s.suggestionJSON.EngineRanks = append(s.suggestionJSON.EngineRanks, rank) 64 | } 65 | -------------------------------------------------------------------------------- /src/search/result/r_suggestion_scraped.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | type SuggestionScraped struct { 8 | value string 9 | rank RankSimpleScraped 10 | } 11 | 12 | func NewSuggestionScraped(value string, seName engines.Name, rank int) SuggestionScraped { 13 | r := NewRankSimpleScraped(seName, rank) 14 | return SuggestionScraped{ 15 | value, 16 | r, 17 | } 18 | } 19 | 20 | func (s SuggestionScraped) Key() string { 21 | return s.Value() 22 | } 23 | 24 | func (s SuggestionScraped) Value() string { 25 | return s.value 26 | } 27 | 28 | func (s SuggestionScraped) Rank() RankSimpleScraped { 29 | return s.rank 30 | } 31 | 32 | func (s SuggestionScraped) Convert(erCap int) Suggestion { 33 | engineRanks := make([]RankSimple, 0, erCap) 34 | engineRanks = append(engineRanks, s.Rank().Convert()) 35 | return Suggestion{ 36 | suggestionJSON{ 37 | Value: s.Value(), 38 | EngineRanks: engineRanks, 39 | }, 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/search/result/r_web_output.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | type WebOutput struct { 4 | webOutputJSON 5 | } 6 | 7 | type webOutputJSON struct { 8 | Web 9 | 10 | FqdnHash string `json:"fqdn_hash,omitempty"` 11 | FqdnHashTimestamp string `json:"fqdn_hash_timestamp,omitempty"` 12 | } 13 | -------------------------------------------------------------------------------- /src/search/result/r_web_scraped.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/utils/moreurls" 5 | "github.com/rs/zerolog/log" 6 | ) 7 | 8 | type WebScraped struct { 9 | url string 10 | title string 11 | description string 12 | rank RankScraped 13 | } 14 | 15 | func (r WebScraped) Key() string { 16 | return r.URL() 17 | } 18 | 19 | func (r WebScraped) URL() string { 20 | if r.url == "" { 21 | log.Panic().Msg("url is empty") 22 | // ^PANIC - Assert because the url should never be empty. 23 | } 24 | 25 | return r.url 26 | } 27 | 28 | func (r WebScraped) Title() string { 29 | if r.title == "" { 30 | log.Panic().Msg("title is empty") 31 | // ^PANIC - Assert because the title should never be empty. 32 | } 33 | 34 | return r.title 35 | } 36 | 37 | func (r WebScraped) Description() string { 38 | return r.description 39 | } 40 | 41 | func (r WebScraped) Rank() RankScraped { 42 | return r.rank 43 | } 44 | 45 | func (r WebScraped) Convert(erCap int) Result { 46 | engineRanks := make([]Rank, 0, erCap) 47 | engineRanks = append(engineRanks, r.Rank().Convert()) 48 | return &Web{ 49 | webJSON{ 50 | URL: r.URL(), 51 | FQDN: moreurls.FQDN(r.URL()), 52 | Title: r.Title(), 53 | Description: r.Description(), 54 | EngineRanks: engineRanks, 55 | }, 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/search/result/rank.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | type Rank struct { 8 | RankSimple 9 | 10 | rankJSON 11 | } 12 | 13 | type rankJSON struct { 14 | Page int `json:"page"` 15 | OnPageRank int `json:"on_page_rank"` 16 | } 17 | 18 | func (r Rank) Page() int { 19 | return r.rankJSON.Page 20 | } 21 | 22 | func (r *Rank) SetPage(page, onPageRank int) { 23 | r.rankJSON.Page = page 24 | r.rankJSON.OnPageRank = onPageRank 25 | } 26 | 27 | func (r Rank) OnPageRank() int { 28 | return r.rankJSON.OnPageRank 29 | } 30 | 31 | func (r *Rank) SetOnPageRank(onPageRank int) { 32 | r.rankJSON.OnPageRank = onPageRank 33 | } 34 | 35 | func (r *Rank) UpgradeIfBetter(newR Rank) { 36 | if r.Page() > newR.Page() { 37 | r.SetPage(newR.Page(), newR.OnPageRank()) 38 | } else if r.Page() == newR.Page() && r.OnPageRank() > newR.OnPageRank() { 39 | r.SetOnPageRank(newR.OnPageRank()) 40 | } 41 | } 42 | 43 | func NewRank(searchEngine engines.Name, rank, page, onPageRank int) Rank { 44 | return Rank{ 45 | RankSimple{ 46 | rankSimpleJSON{ 47 | SearchEngine: searchEngine, 48 | Rank: rank, 49 | }, 50 | }, 51 | rankJSON{ 52 | page, 53 | onPageRank, 54 | }, 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/search/result/rank/filler.go: -------------------------------------------------------------------------------- 1 | package rank 2 | 3 | import ( 4 | "sort" 5 | 6 | "github.com/hearchco/agent/src/search/engines" 7 | "github.com/hearchco/agent/src/search/result" 8 | ) 9 | 10 | // Calculates Rank value of every EngineRank for each Search Engine individually by using Page and OnPageRank to sort. 11 | func (res Results) fillEngineRankRank() { 12 | seEngineRanks := make([][]*result.Rank, len(engines.NameValues())) 13 | 14 | for _, r := range res { 15 | for i := range r.EngineRanks() { 16 | er := &r.EngineRanks()[i] 17 | seEngineRanks[er.SearchEngine()] = append(seEngineRanks[er.SearchEngine()], er) 18 | } 19 | } 20 | 21 | for _, seer := range seEngineRanks { 22 | sort.Sort(ByPageAndOnPageRank(seer)) 23 | for i, er := range seer { 24 | er.SetRank(i + 1) 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/search/result/rank/interfaces.go: -------------------------------------------------------------------------------- 1 | package rank 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | type scoreRanker interface { 8 | Score() float64 9 | } 10 | 11 | type scoreEngineRanker[T ranker] interface { 12 | scoreRanker 13 | 14 | EngineRanks() []T 15 | } 16 | 17 | type ranker interface { 18 | SearchEngine() engines.Name 19 | Rank() int 20 | } 21 | -------------------------------------------------------------------------------- /src/search/result/rank/results.go: -------------------------------------------------------------------------------- 1 | package rank 2 | 3 | import ( 4 | "sort" 5 | 6 | "github.com/hearchco/agent/src/search/category" 7 | "github.com/hearchco/agent/src/search/result" 8 | ) 9 | 10 | type Results []result.Result 11 | 12 | // Calculates the Score, sorts by it and then populates the Rank field of every result. 13 | func (r Results) Rank(rconf category.Ranking) { 14 | // Fill Rank field for every EngineRank. 15 | r.fillEngineRankRank() 16 | 17 | // Calculate and set scores. 18 | r.calculateScores(rconf) 19 | 20 | // Sort slice by score. 21 | sort.Sort(ByScore[result.Result](r)) 22 | 23 | // Set correct ranks, by iterating over the sorted slice. 24 | r.correctRanks() 25 | } 26 | 27 | func (r Results) correctRanks() { 28 | for i, res := range r { 29 | res.SetRank(i + 1) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/search/result/rank/score.go: -------------------------------------------------------------------------------- 1 | package rank 2 | 3 | import ( 4 | "math" 5 | 6 | "github.com/hearchco/agent/src/search/category" 7 | ) 8 | 9 | // Calculates and sets scores for all results. 10 | func (r Results) calculateScores(rconf category.Ranking) { 11 | for _, res := range r { 12 | res.SetScore(calculateScore(res, rconf)) 13 | } 14 | } 15 | 16 | // Calculates and sets scores for all results. 17 | func (s Suggestions) calculateScores(rconf category.Ranking) { 18 | for i := range s { 19 | sug := &s[i] 20 | sug.SetScore(calculateScore(sug, rconf)) 21 | } 22 | } 23 | 24 | // Calculates the score for one result. 25 | func calculateScore[T ranker](val scoreEngineRanker[T], rconf category.Ranking) float64 { 26 | var rankScoreSum float64 = 0 27 | 28 | // Calculate the sum of the rank scores of all engines. 29 | // The rank score is dividing 100 to invert the priority (the lower the rank, the higher the score). 30 | for _, er := range val.EngineRanks() { 31 | eng := rconf.Engines[er.SearchEngine()] 32 | rankScoreSum += (100.0/math.Pow(float64(er.Rank())*rconf.RankMul+rconf.RankAdd, rconf.RankExp)*rconf.RankScoreMul+rconf.RankScoreAdd)*eng.Mul + eng.Add 33 | } 34 | 35 | // Calculate the average rank score from the sum. 36 | rankScoreAvg := rankScoreSum / float64(len(val.EngineRanks())) 37 | 38 | // Calculate a second score based on the number of times the result was returned. 39 | // Log is used to make the score less sensitive to the number of times returned. 40 | timesReturnedScore := math.Log(float64(len(val.EngineRanks()))*rconf.TimesReturnedMul+rconf.TimesReturnedAdd)*100*rconf.TimesReturnedScoreMul + rconf.TimesReturnedScoreAdd 41 | 42 | return rankScoreAvg + timesReturnedScore 43 | } 44 | -------------------------------------------------------------------------------- /src/search/result/rank/sort.go: -------------------------------------------------------------------------------- 1 | package rank 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/rs/zerolog/log" 7 | 8 | "github.com/hearchco/agent/src/search/result" 9 | ) 10 | 11 | type ByScore[T scoreRanker] []T 12 | 13 | func (r ByScore[T]) Len() int { return len(r) } 14 | func (r ByScore[T]) Swap(i, j int) { r[i], r[j] = r[j], r[i] } 15 | func (r ByScore[T]) Less(i, j int) bool { return r[i].Score() > r[j].Score() } 16 | 17 | type ByPageAndOnPageRank []*result.Rank 18 | 19 | func (r ByPageAndOnPageRank) Len() int { return len(r) } 20 | func (r ByPageAndOnPageRank) Swap(i, j int) { r[i], r[j] = r[j], r[i] } 21 | func (r ByPageAndOnPageRank) Less(i, j int) bool { 22 | if r[i].Page() != r[j].Page() { 23 | return r[i].Page() < r[j].Page() 24 | } 25 | 26 | if r[i].OnPageRank() != r[j].OnPageRank() { 27 | return r[i].OnPageRank() < r[j].OnPageRank() 28 | } 29 | 30 | log.Panic(). 31 | Caller(). 32 | Str("comparableA", fmt.Sprintf("%v", r[i])). 33 | Str("comparableB", fmt.Sprintf("%v", r[j])). 34 | Msg("Failed at ranking: same page and onpagerank") 35 | // ^PANIC 36 | 37 | panic("Failed at ranking: same page and onpagerank") 38 | } 39 | -------------------------------------------------------------------------------- /src/search/result/rank/structs_test.go: -------------------------------------------------------------------------------- 1 | package rank 2 | 3 | type testPair struct { 4 | orig Results 5 | expected Results 6 | } 7 | -------------------------------------------------------------------------------- /src/search/result/rank/suggestions.go: -------------------------------------------------------------------------------- 1 | package rank 2 | 3 | import ( 4 | "sort" 5 | 6 | "github.com/hearchco/agent/src/search/category" 7 | "github.com/hearchco/agent/src/search/result" 8 | ) 9 | 10 | type Suggestions []result.Suggestion 11 | 12 | // Calculates the Score, sorts by it and then populates the Rank field of every result. 13 | func (s Suggestions) Rank(rconf category.Ranking) { 14 | // Calculate and set scores. 15 | s.calculateScores(rconf) 16 | 17 | // Sort slice by score. 18 | sort.Sort(ByScore[result.Suggestion](s)) 19 | 20 | // Set correct ranks, by iterating over the sorted slice. 21 | s.correctRanks() 22 | } 23 | 24 | func (s Suggestions) correctRanks() { 25 | for i := range s { 26 | sug := &s[i] 27 | sug.SetRank(i + 1) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/search/result/rank_scraped.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | type RankScraped struct { 8 | RankSimpleScraped 9 | 10 | page int 11 | onPageRank int 12 | } 13 | 14 | func (r RankScraped) Page() int { 15 | return r.page 16 | } 17 | 18 | func (r RankScraped) OnPageRank() int { 19 | return r.onPageRank 20 | } 21 | 22 | func (r RankScraped) Convert() Rank { 23 | rankSimple := r.RankSimpleScraped.Convert() 24 | return Rank{ 25 | rankSimple, 26 | rankJSON{ 27 | r.page, 28 | r.onPageRank, 29 | }, 30 | } 31 | } 32 | 33 | func NewRankScraped(searchEngine engines.Name, rank, page, onPageRank int) RankScraped { 34 | rankSimpleScraped := NewRankSimpleScraped(searchEngine, rank) 35 | return RankScraped{ 36 | rankSimpleScraped, 37 | page, 38 | onPageRank, 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/search/result/ranksimple.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | type RankSimple struct { 8 | rankSimpleJSON 9 | } 10 | 11 | type rankSimpleJSON struct { 12 | SearchEngine engines.Name `json:"search_engine"` 13 | Rank int `json:"rank"` 14 | } 15 | 16 | func (r RankSimple) SearchEngine() engines.Name { 17 | return r.rankSimpleJSON.SearchEngine 18 | } 19 | 20 | func (r RankSimple) Rank() int { 21 | return r.rankSimpleJSON.Rank 22 | } 23 | 24 | func (r *RankSimple) SetRank(rank int) { 25 | r.rankSimpleJSON.Rank = rank 26 | } 27 | 28 | func (r *RankSimple) UpgradeIfBetter(newR RankSimple) { 29 | if r.Rank() > newR.Rank() { 30 | r.SetRank(newR.Rank()) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/search/result/ranksimple_scraped.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | import ( 4 | "github.com/hearchco/agent/src/search/engines" 5 | ) 6 | 7 | type RankSimpleScraped struct { 8 | searchEngine engines.Name 9 | rank int 10 | } 11 | 12 | func (r RankSimpleScraped) SearchEngine() engines.Name { 13 | return r.searchEngine 14 | } 15 | 16 | func (r RankSimpleScraped) Rank() int { 17 | return r.rank 18 | } 19 | 20 | func (r RankSimpleScraped) Convert() RankSimple { 21 | return RankSimple{ 22 | rankSimpleJSON{ 23 | r.searchEngine, 24 | r.rank, 25 | }, 26 | } 27 | } 28 | 29 | func NewRankSimpleScraped(searchEngine engines.Name, rank int) RankSimpleScraped { 30 | return RankSimpleScraped{ 31 | searchEngine, 32 | rank, 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/search/result/result_map.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | import ( 4 | "slices" 5 | "sync" 6 | 7 | "github.com/hearchco/agent/src/search/engines" 8 | "github.com/rs/zerolog/log" 9 | ) 10 | 11 | type ResultConcMap struct { 12 | enabledEnginesLen int 13 | titleLen, descLen int 14 | mutex sync.RWMutex 15 | mapp map[string]Result 16 | } 17 | 18 | func NewResultMap(enabledEnginesLen, titleLen, descLen int) ResultConcMap { 19 | return ResultConcMap{ 20 | enabledEnginesLen: enabledEnginesLen, 21 | titleLen: titleLen, 22 | descLen: descLen, 23 | mutex: sync.RWMutex{}, 24 | mapp: make(map[string]Result), 25 | } 26 | } 27 | 28 | // Passed as pointer because of the mutex. 29 | func (m *ResultConcMap) AddOrUpgrade(val ResultScraped) { 30 | if val.Rank().SearchEngine().String() == "" || val.Rank().SearchEngine() == engines.UNDEFINED { 31 | log.Panic(). 32 | Str("engine", val.Rank().SearchEngine().String()). 33 | Msg("Received a result with an undefined search engine") 34 | // ^PANIC - Assert because it should never happen. 35 | } 36 | 37 | // Lock the map due to modifications. 38 | m.mutex.Lock() 39 | defer m.mutex.Unlock() 40 | 41 | mapVal, exists := m.mapp[val.Key()] 42 | if !exists { 43 | // Add the result to the map. 44 | m.mapp[val.Key()] = val.Convert(m.enabledEnginesLen) 45 | } else { 46 | var alreadyIn *Rank 47 | 48 | // Check if the engine rank is already in the result. 49 | for i, er := range mapVal.EngineRanks() { 50 | if val.Rank().SearchEngine() == er.SearchEngine() { 51 | alreadyIn = &mapVal.EngineRanks()[i] 52 | break 53 | } 54 | } 55 | 56 | // Update the result if the new rank is better. 57 | if alreadyIn == nil { 58 | mapVal.AppendEngineRanks(val.Rank().Convert()) 59 | } else { 60 | alreadyIn.UpgradeIfBetter(val.Rank().Convert()) 61 | } 62 | 63 | // Update the description if the new description is longer. 64 | if len(mapVal.Description()) < len(val.Description()) { 65 | mapVal.SetDescription(val.Description()) 66 | } 67 | } 68 | } 69 | 70 | // Passed as pointer because of the mutex. 71 | func (m *ResultConcMap) ExtractWithResponders() ([]Result, []engines.Name) { 72 | m.mutex.RLock() 73 | defer m.mutex.RUnlock() 74 | 75 | results := make([]Result, 0, len(m.mapp)) 76 | responders := make([]engines.Name, 0, m.enabledEnginesLen) 77 | 78 | for _, res := range m.mapp { 79 | newRes := res.Shorten(m.titleLen, m.descLen) 80 | newRes.ShrinkEngineRanks() 81 | results = append(results, newRes) 82 | for _, rank := range res.EngineRanks() { 83 | if !slices.Contains(responders, rank.SearchEngine()) { 84 | responders = append(responders, rank.SearchEngine()) 85 | } 86 | } 87 | } 88 | 89 | return results, responders 90 | } 91 | -------------------------------------------------------------------------------- /src/search/result/shorten.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | // Changes the title and description of the result to be at most N and M characters long respectively. 4 | func (r Web) Shorten(maxTitleLength int, maxDescriptionLength int) Result { 5 | short := r 6 | short.webJSON.Title = shortString(r.Title(), maxTitleLength) 7 | short.webJSON.Description = shortString(r.Description(), maxDescriptionLength) 8 | return &short 9 | } 10 | 11 | func (r Images) Shorten(maxTitleLength int, maxDescriptionLength int) Result { 12 | short := r 13 | short.webJSON.Title = shortString(r.Title(), maxTitleLength) 14 | short.webJSON.Description = shortString(r.Description(), maxDescriptionLength) 15 | return &short 16 | } 17 | 18 | func shortString(s string, n int) string { 19 | if n < 0 { 20 | return s 21 | } 22 | 23 | suffix := "..." 24 | if n-len(suffix) <= 0 { 25 | suffix = "" // No room for suffix. 26 | } 27 | 28 | if len(s) > n { 29 | short := firstNchars(s, n-len(suffix)) 30 | return short + suffix 31 | } 32 | 33 | return s 34 | } 35 | 36 | func firstNchars(str string, n int) string { 37 | v := []rune(str) 38 | if n < 0 || n >= len(v) { 39 | return str 40 | } 41 | return string(v[:n]) 42 | } 43 | -------------------------------------------------------------------------------- /src/search/result/suggestions_map.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | import ( 4 | "slices" 5 | "sync" 6 | 7 | "github.com/hearchco/agent/src/search/engines" 8 | "github.com/rs/zerolog/log" 9 | ) 10 | 11 | type SuggestionConcMap struct { 12 | enabledEnginesLen int 13 | mutex sync.RWMutex 14 | mapp map[string]Suggestion 15 | } 16 | 17 | func NewSuggestionMap(enabledEnginesLen int) SuggestionConcMap { 18 | return SuggestionConcMap{ 19 | enabledEnginesLen: enabledEnginesLen, 20 | mutex: sync.RWMutex{}, 21 | mapp: make(map[string]Suggestion), 22 | } 23 | } 24 | 25 | func (m *SuggestionConcMap) AddOrUpgrade(val SuggestionScraped) { 26 | if val.Rank().SearchEngine().String() == "" || val.Rank().SearchEngine() == engines.UNDEFINED { 27 | log.Panic(). 28 | Str("engine", val.Rank().SearchEngine().String()). 29 | Msg("Received a suggestion with an undefined search engine") 30 | // ^PANIC - Assert because it should never happen. 31 | } 32 | 33 | // Lock the map due to modifications. 34 | m.mutex.Lock() 35 | defer m.mutex.Unlock() 36 | 37 | mapVal, exists := m.mapp[val.Key()] 38 | if !exists { 39 | // Add the result to the map. 40 | m.mapp[val.Key()] = val.Convert(m.enabledEnginesLen) 41 | } else { 42 | var alreadyIn *RankSimple 43 | 44 | // Check if the engine rank is already in the result. 45 | for i, er := range mapVal.EngineRanks() { 46 | if val.Rank().SearchEngine() == er.SearchEngine() { 47 | alreadyIn = &mapVal.EngineRanks()[i] 48 | break 49 | } 50 | } 51 | 52 | // Update the result if the new rank is better. 53 | if alreadyIn == nil { 54 | mapVal.AppendEngineRanks(val.Rank().Convert()) 55 | } else { 56 | alreadyIn.UpgradeIfBetter(val.Rank().Convert()) 57 | } 58 | } 59 | } 60 | 61 | func (m *SuggestionConcMap) ExtractWithResponders() ([]Suggestion, []engines.Name) { 62 | m.mutex.RLock() 63 | defer m.mutex.RUnlock() 64 | 65 | suggestions := make([]Suggestion, 0, len(m.mapp)) 66 | responders := make([]engines.Name, 0, m.enabledEnginesLen) 67 | 68 | for _, sug := range m.mapp { 69 | sug.ShrinkEngineRanks() 70 | suggestions = append(suggestions, sug) 71 | for _, rank := range sug.EngineRanks() { 72 | if !slices.Contains(responders, rank.SearchEngine()) { 73 | responders = append(responders, rank.SearchEngine()) 74 | } 75 | } 76 | } 77 | 78 | return suggestions, responders 79 | } 80 | -------------------------------------------------------------------------------- /src/search/run_engine.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/rs/zerolog/log" 7 | 8 | "github.com/hearchco/agent/src/search/engines" 9 | "github.com/hearchco/agent/src/search/engines/options" 10 | "github.com/hearchco/agent/src/search/result" 11 | "github.com/hearchco/agent/src/utils/anonymize" 12 | ) 13 | 14 | type Runner[T any] func(string, options.Options, chan T) ([]error, bool) 15 | 16 | func runEngine[T any](groupName string, onceWrap *onceWrapper, concMap result.ConcMapper[T], engName engines.Name, runner Runner[T], query string, opts options.Options) { 17 | // Run the engine only once. 18 | onceWrap.Do(func() { 19 | // Create a buffered channel for the results. 20 | resChan := make(chan T, 100) 21 | 22 | // Start the receiver for the engine. 23 | var receiver sync.WaitGroup 24 | receiver.Add(1) 25 | go createReceiver(&receiver, resChan, concMap) 26 | 27 | log.Trace(). 28 | Str("engine", engName.String()). 29 | Str("query", anonymize.String(query)). 30 | Str("group", groupName). 31 | Msg("Started") 32 | 33 | // Run the engine. 34 | errs, scraped := runner(query, opts, resChan) 35 | 36 | if len(errs) > 0 { 37 | onceWrap.Errored() 38 | log.Error(). 39 | Errs("errors", errs). 40 | Str("engine", engName.String()). 41 | Str("query", anonymize.String(query)). 42 | Str("group", groupName). 43 | Msg("Error searching") 44 | } 45 | 46 | if !scraped { 47 | log.Debug(). 48 | Str("engine", engName.String()). 49 | Str("query", anonymize.String(query)). 50 | Str("group", groupName). 51 | Msg("Failed to scrape any results (probably timed out)") 52 | } else { 53 | onceWrap.Scraped() 54 | } 55 | 56 | // Wait for the receiver to finish. 57 | receiver.Wait() 58 | }) 59 | } 60 | -------------------------------------------------------------------------------- /src/search/scraper/dompaths.go: -------------------------------------------------------------------------------- 1 | package scraper 2 | 3 | type DOMPaths struct { 4 | ResultsContainer string 5 | Result string 6 | URL string 7 | Title string 8 | Description string 9 | } 10 | 11 | type DOMPathsImages struct { 12 | DOMPaths 13 | 14 | OriginalSize struct { 15 | Height string 16 | Width string 17 | } 18 | ThumbnailSize struct { 19 | Height string 20 | Width string 21 | } 22 | ThumbnailURL string 23 | SourceName string 24 | SourceURL string 25 | } 26 | -------------------------------------------------------------------------------- /src/search/scraper/enginebase.go: -------------------------------------------------------------------------------- 1 | package scraper 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/gocolly/colly/v2" 7 | 8 | "github.com/hearchco/agent/src/search/engines" 9 | ) 10 | 11 | // Base struct for every search engine. 12 | type EngineBase struct { 13 | Name engines.Name 14 | Origins []engines.Name 15 | collector *colly.Collector 16 | } 17 | 18 | // Used to get the name of the search engine. 19 | func (e EngineBase) GetName() engines.Name { 20 | return e.Name 21 | } 22 | 23 | // Used to get the origins of the search engine. 24 | func (e EngineBase) GetOrigins() []engines.Name { 25 | return e.Origins 26 | } 27 | 28 | // Used to initialize the EngineBase collector. 29 | func (e *EngineBase) Init(ctx context.Context) { 30 | e.initCollectorOnRequest(ctx) 31 | e.initCollectorOnResponse() 32 | e.initCollectorOnError() 33 | } 34 | 35 | // Used to initialize the EngineBase collector for searching web/images. 36 | func (e *EngineBase) InitSearcher(ctx context.Context) { 37 | e.initCollectorSearcher(ctx) 38 | e.Init(ctx) 39 | } 40 | 41 | // Used to initialize the EngineBase collector for searching suggestions. 42 | func (e *EngineBase) InitSuggester(ctx context.Context) { 43 | e.initCollectorSuggester(ctx) 44 | e.Init(ctx) 45 | } 46 | -------------------------------------------------------------------------------- /src/search/scraper/interfaces.go: -------------------------------------------------------------------------------- 1 | package scraper 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/hearchco/agent/src/search/engines" 7 | "github.com/hearchco/agent/src/search/engines/options" 8 | "github.com/hearchco/agent/src/search/result" 9 | ) 10 | 11 | // Interface that each search engine must implement to be a Search Engine. 12 | type Enginer interface { 13 | GetName() engines.Name 14 | GetOrigins() []engines.Name 15 | Init(context.Context) 16 | } 17 | 18 | // Interface that each search engine must implement to support searching web results. 19 | type WebSearcher interface { 20 | Enginer 21 | 22 | InitSearcher(context.Context) 23 | WebSearch(string, options.Options, chan result.ResultScraped) ([]error, bool) 24 | } 25 | 26 | // Interface that each search engine must implement to support searching image results. 27 | type ImageSearcher interface { 28 | Enginer 29 | 30 | InitSearcher(context.Context) 31 | ImageSearch(string, options.Options, chan result.ResultScraped) ([]error, bool) 32 | } 33 | 34 | // Interface that each search engine must implement to support suggesting. 35 | type Suggester interface { 36 | Enginer 37 | 38 | InitSuggester(context.Context) 39 | Suggest(string, options.Options, chan result.SuggestionScraped) ([]error, bool) 40 | } 41 | -------------------------------------------------------------------------------- /src/search/scraper/pagecontext.go: -------------------------------------------------------------------------------- 1 | package scraper 2 | 3 | import ( 4 | "strconv" 5 | 6 | "github.com/gocolly/colly/v2" 7 | "github.com/rs/zerolog/log" 8 | ) 9 | 10 | func (e EngineBase) PageFromContext(ctx *colly.Context) int { 11 | var pageStr string = ctx.Get("page") 12 | page, err := strconv.Atoi(pageStr) 13 | if err != nil { 14 | log.Panic(). 15 | Caller(). 16 | Err(err). 17 | Str("engine", e.Name.String()). 18 | Str("page", pageStr). 19 | Msg("Failed to convert page number to int") 20 | // ^PANIC 21 | } 22 | return page 23 | } 24 | -------------------------------------------------------------------------------- /src/search/scraper/pagerankcounter.go: -------------------------------------------------------------------------------- 1 | package scraper 2 | 3 | import ( 4 | "sync/atomic" 5 | ) 6 | 7 | // A goroutine-safe counter for PageRank. 8 | type PageRankCounter struct { 9 | counts []atomic.Int32 10 | } 11 | 12 | // Create a new PageRankCounter. 13 | func NewPageRankCounter(pages int) PageRankCounter { 14 | return PageRankCounter{counts: make([]atomic.Int32, pages)} 15 | } 16 | 17 | // Increment the count for a page. 18 | func (prc *PageRankCounter) Increment(page int) { 19 | prc.counts[page].Add(1) 20 | } 21 | 22 | // Get the count for a page + 1. 23 | func (prc *PageRankCounter) GetPlusOne(page int) int { 24 | return int(prc.counts[page].Load() + 1) 25 | } 26 | -------------------------------------------------------------------------------- /src/search/scraper/parse/fields.go: -------------------------------------------------------------------------------- 1 | package parse 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/PuerkitoBio/goquery" 7 | "github.com/rs/zerolog/log" 8 | 9 | "github.com/hearchco/agent/src/search/engines" 10 | "github.com/hearchco/agent/src/search/scraper" 11 | ) 12 | 13 | // Fetches from DOM via dompaths. Returns url, title and description. 14 | func RawFieldsFromDOM(dom *goquery.Selection, dompaths scraper.DOMPaths, seName engines.Name) (string, string, string) { 15 | descText := dom.Find(dompaths.Description).Text() 16 | titleDom := dom.Find(dompaths.Title) 17 | titleText := titleDom.Text() 18 | 19 | // Title and URL selector are often the same. 20 | var linkDom *goquery.Selection 21 | if dompaths.URL == dompaths.Result { 22 | linkDom = titleDom 23 | } else { 24 | linkDom = dom.Find(dompaths.URL) 25 | } 26 | 27 | linkText, hrefExists := linkDom.Attr("href") 28 | if !hrefExists { 29 | log.Error(). 30 | Caller(). 31 | Str("engine", seName.String()). 32 | Str("url", linkText). 33 | Str("title", titleText). 34 | Str("description", descText). 35 | Msgf("Href attribute doesn't exist on matched URL element (%v)", dompaths.URL) 36 | 37 | return "", "", "" 38 | } 39 | 40 | return linkText, titleText, descText 41 | } 42 | 43 | // Fetches from DOM via dompaths and sanitizes. Returns url, title and description. 44 | func FieldsFromDOM(dom *goquery.Selection, dompaths scraper.DOMPaths, seName engines.Name) (string, string, string) { 45 | return SanitizeFields(RawFieldsFromDOM(dom, dompaths, seName)) 46 | } 47 | 48 | func SanitizeURL(urlText string) string { 49 | return ParseURL(urlText) 50 | } 51 | 52 | func SanitizeTitle(titleText string) string { 53 | return ParseTextWithHTML(strings.TrimSpace(titleText)) 54 | } 55 | 56 | func SanitizeDescription(descText string) string { 57 | return ParseTextWithHTML(strings.TrimSpace(descText)) 58 | } 59 | 60 | func SanitizeFields(linkText string, titleText string, descText string) (string, string, string) { 61 | return SanitizeURL(linkText), SanitizeTitle(titleText), SanitizeDescription(descText) 62 | } 63 | -------------------------------------------------------------------------------- /src/search/scraper/parse/parse.go: -------------------------------------------------------------------------------- 1 | package parse 2 | 3 | import ( 4 | "fmt" 5 | "net/url" 6 | "strings" 7 | 8 | "github.com/PuerkitoBio/goquery" 9 | "github.com/rs/zerolog/log" 10 | "golang.org/x/net/html" 11 | ) 12 | 13 | func ParseURL(rawURL string) string { 14 | urll, err := parseURL(rawURL) 15 | if err != nil { 16 | log.Error(). 17 | Caller(). 18 | Err(err). 19 | Str("url", urll). 20 | Msg("Couldn't parse url") 21 | return rawURL 22 | } 23 | return urll 24 | } 25 | 26 | func parseURL(rawURL string) (string, error) { 27 | trimmedRawURL := strings.TrimSpace(rawURL) 28 | parsedURL, err := url.Parse(trimmedRawURL) 29 | if err != nil { 30 | return "", fmt.Errorf("parse.parseURL(): failed url.Parse() on url(%v). error: %w", rawURL, err) 31 | } 32 | 33 | urlString := parsedURL.String() 34 | if len(urlString) > 0 && urlString[len(urlString)-1] == '/' { 35 | urlString = urlString[:len(urlString)-1] 36 | } 37 | 38 | return urlString, nil 39 | } 40 | 41 | func ParseTextWithHTML(rawHTML string) string { 42 | text, err := parseTextWithHTML(rawHTML) 43 | if err != nil { 44 | log.Error(). 45 | Caller(). 46 | Err(err). 47 | Str("html", rawHTML). 48 | Msg("Failed parsing text with html") 49 | return rawHTML 50 | } 51 | return text 52 | } 53 | 54 | func parseTextWithHTML(rawHTML string) (string, error) { 55 | var result string = "" 56 | 57 | htmlNode, err := html.ParseFragment(strings.NewReader(rawHTML), nil) 58 | if err != nil { 59 | return "", fmt.Errorf("Failed html.ParseFragment on %v: %w", rawHTML, err) 60 | } 61 | 62 | for _, el := range htmlNode { 63 | sel := goquery.NewDocumentFromNode(el) 64 | result += sel.Text() 65 | } 66 | 67 | return result, nil 68 | } 69 | -------------------------------------------------------------------------------- /src/search/scraper/requests.go: -------------------------------------------------------------------------------- 1 | package scraper 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "net/http" 7 | 8 | "github.com/gocolly/colly/v2" 9 | "github.com/rs/zerolog/log" 10 | ) 11 | 12 | func (e EngineBase) Get(ctx *colly.Context, urll string, anonUrll string) error { 13 | log.Trace(). 14 | Str("engine", e.Name.String()). 15 | Str("url", anonUrll). 16 | Str("method", http.MethodGet). 17 | Msg("Making a new request") 18 | 19 | if err := e.collector.Request(http.MethodGet, urll, nil, ctx, nil); err != nil { 20 | return fmt.Errorf("%v: failed GET request to %v with %w", e.Name.String(), anonUrll, err) 21 | } 22 | 23 | return nil 24 | } 25 | 26 | func (e EngineBase) Post(ctx *colly.Context, urll string, body io.Reader, anonBody string) error { 27 | log.Trace(). 28 | Str("engine", e.Name.String()). 29 | Str("url", urll). 30 | Str("body", anonBody). 31 | Str("method", http.MethodPost). 32 | Msg("Making a new request") 33 | 34 | if err := e.collector.Request(http.MethodPost, urll, body, ctx, nil); err != nil { 35 | return fmt.Errorf("%v: failed POST request to %v with %w", e.Name.String(), urll, err) 36 | } 37 | 38 | return nil 39 | } 40 | -------------------------------------------------------------------------------- /src/search/scraper/scrape.go: -------------------------------------------------------------------------------- 1 | package scraper 2 | 3 | import ( 4 | "github.com/gocolly/colly/v2" 5 | ) 6 | 7 | // OnHTML registers a function. Function will be executed on every HTML 8 | // element matched by the GoQuery Selector parameter. 9 | // GoQuery Selector is a selector used by https://github.com/PuerkitoBio/goquery. 10 | func (e *EngineBase) OnHTML(goquerySelector string, f colly.HTMLCallback) { 11 | e.collector.OnHTML(goquerySelector, f) 12 | } 13 | 14 | // OnResponse registers a function. Function will be executed on every response. 15 | func (e *EngineBase) OnResponse(f colly.ResponseCallback) { 16 | e.collector.OnResponse(f) 17 | } 18 | 19 | // OnRequest registers a function. Function will be executed on every 20 | // request made by the Collector. 21 | func (e *EngineBase) OnRequest(f colly.RequestCallback) { 22 | e.collector.OnRequest(f) 23 | } 24 | 25 | // Wait returns when the collector jobs are finished. 26 | func (e EngineBase) Wait() { 27 | e.collector.Wait() 28 | } 29 | -------------------------------------------------------------------------------- /src/search/scraper/suggest_resp.go: -------------------------------------------------------------------------------- 1 | package scraper 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | ) 7 | 8 | // Converts a opensearch.xml compatible suggestions API JSON to a slice of suggestions. 9 | func SuggestRespToSuggestions(data []byte) ([]string, error) { 10 | // Define a structure that matches the JSON structure. 11 | var resp []any 12 | 13 | // Unmarshal the JSON data. 14 | if err := json.Unmarshal(data, &resp); err != nil { 15 | return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) 16 | } 17 | 18 | // Check the structure and extract the slice of strings. 19 | if len(resp) < 2 { 20 | return nil, fmt.Errorf("unexpected JSON structure") 21 | } 22 | 23 | // Assert the second element is a slice. 24 | strSlice, ok := resp[1].([]any) 25 | if !ok { 26 | return nil, fmt.Errorf("unexpected type for second element") 27 | } 28 | 29 | // Error if no suggestions returned. 30 | if len(strSlice) == 0 { 31 | return nil, fmt.Errorf("empty suggestions") 32 | } 33 | 34 | // Convert to slice of strings. 35 | suggs := make([]string, 0, len(strSlice)) 36 | for _, item := range strSlice { 37 | if sug, ok := item.(string); !ok { 38 | return nil, fmt.Errorf("unexpected type in string slice") 39 | } else { 40 | suggs = append(suggs, sug) 41 | } 42 | 43 | } 44 | 45 | return suggs, nil 46 | } 47 | -------------------------------------------------------------------------------- /src/search/scraper/timeout.go: -------------------------------------------------------------------------------- 1 | package scraper 2 | 3 | import ( 4 | "context" 5 | "net" 6 | "strings" 7 | ) 8 | 9 | func IsTimeoutError(err error) bool { 10 | // Check if the error is a cancelled context error. 11 | if strings.HasSuffix(err.Error(), context.Canceled.Error()) { 12 | return true 13 | } 14 | 15 | // Check if the error is a timeout error. 16 | if perr, ok := err.(net.Error); ok && perr.Timeout() { 17 | return true 18 | } 19 | 20 | return false 21 | } 22 | -------------------------------------------------------------------------------- /src/search/searchtype/name.go: -------------------------------------------------------------------------------- 1 | package searchtype 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | type Name string 8 | 9 | const ( 10 | WEB Name = "web" 11 | IMAGES Name = "images" 12 | SUGGESTIONS Name = "suggestions" 13 | ) 14 | 15 | func (st Name) String() string { 16 | return string(st) 17 | } 18 | 19 | // Converts a string to a search type name if it exists. 20 | // Otherwise returns an error. 21 | func FromString(st string) (Name, error) { 22 | switch st { 23 | case WEB.String(): 24 | return WEB, nil 25 | case IMAGES.String(): 26 | return IMAGES, nil 27 | case SUGGESTIONS.String(): 28 | return SUGGESTIONS, nil 29 | default: 30 | return "", fmt.Errorf("search type %q is not defined", st) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/search/useragent/useragent.go: -------------------------------------------------------------------------------- 1 | package useragent 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "slices" 7 | "time" 8 | 9 | "github.com/rs/zerolog/log" 10 | ) 11 | 12 | var browsers = [...]string{"chrome", "edge"} 13 | var versions = [...]int{127, 128} 14 | 15 | type userAgentWithHeaders struct { 16 | UserAgent string 17 | SecCHUA string 18 | SecCHUAMobile string 19 | SecCHUAPlatform string 20 | } 21 | 22 | func userAgentStruct(browser string, version int) userAgentWithHeaders { 23 | if !slices.Contains(browsers[:], browser) { 24 | log.Panic(). 25 | Str("browser", browser). 26 | Msg("Invalid browser") 27 | // ^PANIC - This should never happen 28 | } 29 | if !slices.Contains(versions[:], version) { 30 | log.Panic(). 31 | Int("version", version). 32 | Msg("Invalid version") 33 | // ^PANIC - This should never happen 34 | } 35 | 36 | const userAgentTemplate = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%d.0.0.0 Safari/537.36" 37 | userAgent := fmt.Sprintf(userAgentTemplate, version) 38 | if browser == "edge" { 39 | userAgent = fmt.Sprintf("%s Edg/%d.0.0.0", userAgent, version) 40 | } 41 | 42 | const secCHUATemplate = `"Chromium";v="%d", "Not;A=Brand";v="24", "%s";v="%d"` 43 | secCHUA := fmt.Sprintf(secCHUATemplate, version, "Google Chrome", version) 44 | if browser == "edge" { 45 | secCHUA = fmt.Sprintf(secCHUATemplate, version, "Microsoft Edge", version) 46 | } 47 | 48 | return userAgentWithHeaders{ 49 | userAgent, 50 | secCHUA, 51 | "?0", 52 | `"Windows"`, 53 | } 54 | } 55 | 56 | func randomUserAgentStruct() userAgentWithHeaders { 57 | // WARNING: Will stop working after year 2262. 58 | randSrc := rand.NewSource(time.Now().UnixNano()) 59 | randGen := rand.New(randSrc) 60 | return userAgentStruct(browsers[randGen.Intn(len(browsers))], versions[randGen.Intn(len(versions))]) 61 | } 62 | 63 | func RandomUserAgent() string { 64 | randomUA := randomUserAgentStruct() 65 | return randomUA.UserAgent 66 | } 67 | 68 | func RandomUserAgentWithHeaders() userAgentWithHeaders { 69 | return randomUserAgentStruct() 70 | } 71 | -------------------------------------------------------------------------------- /src/utils/anonymize/hash.go: -------------------------------------------------------------------------------- 1 | package anonymize 2 | 3 | import ( 4 | "crypto/hmac" 5 | "crypto/sha256" 6 | "encoding/base64" 7 | "fmt" 8 | "time" 9 | 10 | "github.com/hearchco/agent/src/utils/moretime" 11 | ) 12 | 13 | // Format used for the timestamps. 14 | const timestampFormat = time.RFC3339 15 | 16 | // Returns the hash of the message. 17 | func CalculateHashBase64(message string) string { 18 | hasher := sha256.New() 19 | hasher.Write([]byte(message)) 20 | hashedBinary := hasher.Sum(nil) 21 | hashedString := base64.URLEncoding.EncodeToString(hashedBinary) 22 | return hashedString 23 | } 24 | 25 | // Returns the hash of the message and the timestamp used to generate it. 26 | func CalculateHMACBase64(message, key string, t time.Time) (string, string) { 27 | hasher := hmac.New(sha256.New, []byte(key)) 28 | timestamp := base64.URLEncoding.EncodeToString([]byte(t.Format(timestampFormat))) 29 | 30 | hasher.Write([]byte(timestamp)) 31 | hasher.Write([]byte(message)) 32 | hashedBinary := hasher.Sum(nil) 33 | 34 | hashedString := base64.URLEncoding.EncodeToString(hashedBinary) 35 | return hashedString, timestamp 36 | } 37 | 38 | // Returns whether the tag is valid for the given message, timestamp and key. 39 | func VerifyHMACBase64(tag, orig, key, timestampB64 string) (bool, error) { 40 | timestamp, err := base64.URLEncoding.DecodeString(timestampB64) 41 | if err != nil { 42 | return false, fmt.Errorf("error decoding timestamp: %v", err) 43 | } 44 | 45 | t, err := time.Parse(timestampFormat, string(timestamp)) 46 | if err != nil { 47 | return false, fmt.Errorf("error parsing timestamp: %v", err) 48 | } 49 | 50 | // TODO: Make duration of the timestamp configurable. 51 | if time.Since(t) > moretime.Day { 52 | return false, nil 53 | } 54 | 55 | verificator, _ := CalculateHMACBase64(orig, key, t) 56 | return tag == verificator, nil 57 | } 58 | -------------------------------------------------------------------------------- /src/utils/anonymize/hash_test.go: -------------------------------------------------------------------------------- 1 | package anonymize 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestHashToSHA256B64(t *testing.T) { 8 | // original string, expected hash (sha256 returns binary and is encoded to base64) 9 | tests := []testPair{ 10 | {"", "47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU="}, 11 | {"banana death", "e8kN64XJ4Icr6Tl9VYrBRj50UJCPlyillODm3vVNk2g="}, 12 | {"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", "LYwvbZeMohcStfbeNsnTH6jpak-l2P-LAYjfuefBcbs="}, 13 | {"Ćao hrčko!! 🐹", "_Y3KWzrx2UkeTp8b--48L6OFgv51JWPlZArjoFOrmbw="}, 14 | } 15 | 16 | for _, test := range tests { 17 | hash := CalculateHashBase64(test.orig) 18 | if hash != test.expected { 19 | t.Errorf("HashToSHA256B64(%q) = %q, want %q", test.orig, hash, test.expected) 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/utils/anonymize/string.go: -------------------------------------------------------------------------------- 1 | package anonymize 2 | 3 | import ( 4 | "math/rand" 5 | "sort" 6 | "strings" 7 | "time" 8 | ) 9 | 10 | // Anonymize string 11 | func String(orig string) string { 12 | return shuffle(deduplicate(orig)) 13 | } 14 | 15 | // Anonymize substring of a string 16 | func Substring(orig string, ssToAnon string) string { 17 | return strings.ReplaceAll(orig, ssToAnon, String(ssToAnon)) 18 | } 19 | 20 | // Remove duplicate characters from string. 21 | func deduplicate(orig string) string { 22 | dedupStr := "" 23 | encountered := make(map[rune]bool) 24 | 25 | for _, char := range orig { 26 | if !encountered[char] { 27 | encountered[char] = true 28 | dedupStr += string(char) 29 | } 30 | } 31 | 32 | return dedupStr 33 | } 34 | 35 | // Shuffle string because deduplicate retains the order of letters. 36 | func shuffle(orig string) string { 37 | inRune := []rune(orig) 38 | 39 | // WARNING: In year 2262, this will break. 40 | rng := rand.New(rand.NewSource(time.Now().UnixNano())) 41 | rng.Shuffle(len(inRune), func(i, j int) { 42 | inRune[i], inRune[j] = inRune[j], inRune[i] 43 | }) 44 | 45 | return string(inRune) 46 | } 47 | 48 | // Sort string characters lexicographically. 49 | func sortString(orig string) string { 50 | // Convert the string to a slice of characters. 51 | characters := strings.Split(orig, "") 52 | sort.Strings(characters) 53 | return strings.Join(characters, "") 54 | } 55 | -------------------------------------------------------------------------------- /src/utils/anonymize/string_test.go: -------------------------------------------------------------------------------- 1 | package anonymize 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestDeduplicate(t *testing.T) { 8 | // original string, expected deduplicated string 9 | tests := []testPair{ 10 | {"", ""}, 11 | {"gmail", "gmail"}, 12 | {"banana death", "ban deth"}, 13 | {"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", "Lorem ipsudlta,cngbq.UvxDhfE"}, 14 | } 15 | 16 | for _, test := range tests { 17 | deduplicated := deduplicate(test.orig) 18 | if deduplicated != test.expected { 19 | t.Errorf("deduplicate(%q) = %q, want %q", test.orig, deduplicated, test.expected) 20 | } 21 | } 22 | } 23 | 24 | func TestSortString(t *testing.T) { 25 | // original string, sorted string 26 | tests := []testPair{ 27 | {"", ""}, 28 | {"gmail", "agilm"}, 29 | {"banana death", " aaaabdehnnt"}, 30 | { 31 | "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", 32 | " ,,.Laaaaaaabccccddddddddeeeeeeeeeeeggiiiiiiiiiiilllllmmmmmmnnnnnoooooooooopppqrrrrrrsssssstttttttttuuuuuu", 33 | }, 34 | } 35 | 36 | for _, test := range tests { 37 | sorted := sortString(test.orig) 38 | 39 | if sorted != test.expected { 40 | t.Errorf("SortString(%q) = %q, want %q", test.orig, sorted, test.expected) 41 | } 42 | } 43 | } 44 | 45 | func TestShuffle(t *testing.T) { 46 | // original string, sorted string 47 | tests := []testPair{ 48 | {"", ""}, 49 | {"gmail", "agilm"}, 50 | {"banana death", " aaaabdehnnt"}, 51 | { 52 | "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", 53 | " ,,.Laaaaaaabccccddddddddeeeeeeeeeeeggiiiiiiiiiiilllllmmmmmmnnnnnoooooooooopppqrrrrrrsssssstttttttttuuuuuu", 54 | }, 55 | } 56 | 57 | for _, test := range tests { 58 | shuffled := shuffle(test.orig) 59 | shuffledSorted := sortString(shuffled) 60 | 61 | if shuffledSorted != test.expected { 62 | t.Errorf("SortString(Shuffle(%q)) = %q, want %q", test.orig, shuffledSorted, test.expected) 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/utils/anonymize/structs_test.go: -------------------------------------------------------------------------------- 1 | package anonymize 2 | 3 | type testPair struct { 4 | orig string 5 | expected string 6 | } 7 | -------------------------------------------------------------------------------- /src/utils/gotypelimits/ints.go: -------------------------------------------------------------------------------- 1 | package gotypelimits 2 | 3 | // const MaxInt8 = int8(MaxUint8 >> 1) 4 | // const MinInt8 = -MaxInt8 - 1 5 | // const MaxInt16 = int16(MaxUint16 >> 1) 6 | // const MinInt16 = -MaxInt16 - 1 7 | // const MaxInt32 = int32(MaxUint32 >> 1) 8 | // const MinInt32 = -MaxInt32 - 1 9 | // const MaxInt64 = int64(MaxUint64 >> 1) 10 | // const MinInt64 = -MaxInt64 - 1 11 | const MaxInt = int(MaxUint >> 1) 12 | 13 | // const MinInt = -MaxInt - 1 14 | -------------------------------------------------------------------------------- /src/utils/gotypelimits/uints.go: -------------------------------------------------------------------------------- 1 | package gotypelimits 2 | 3 | // const MaxUint8 = ^uint8(0) 4 | // const MinUint8 = 0 5 | // const MaxUint16 = ^uint16(0) 6 | // const MinUint16 = 0 7 | // const MaxUint32 = ^uint32(0) 8 | // const MinUint32 = 0 9 | // const MaxUint64 = ^uint64(0) 10 | // const MinUint64 = 0 11 | const MaxUint = ^uint(0) 12 | 13 | // const MinUint = 0 14 | -------------------------------------------------------------------------------- /src/utils/kvpair/kvpair.go: -------------------------------------------------------------------------------- 1 | package kvpair 2 | 3 | import ( 4 | "net/url" 5 | 6 | "github.com/rs/zerolog/log" 7 | ) 8 | 9 | // KVPair struct, a simple key/value string pair. 10 | type KVPair struct { 11 | key string 12 | value string 13 | } 14 | 15 | // Constructs a new KVPair with provided key and value. 16 | func NewKVPair(k, v string) KVPair { 17 | kv := KVPair{k, v} 18 | kv.assert() 19 | return kv 20 | } 21 | 22 | // Private assert function to ensure key and value are not empty. 23 | // Panics if either key or value are empty. 24 | func (kv KVPair) assert() { 25 | if kv.key == "" || kv.value == "" { 26 | log.Panic(). 27 | Str("key", kv.key). 28 | Str("value", kv.value). 29 | Msg("Empty key or value in KVPair") 30 | // ^PANIC - Assert proper values in KVPair. 31 | } 32 | } 33 | 34 | // Returns the key. 35 | func (kv KVPair) Key() string { 36 | kv.assert() 37 | return kv.key 38 | } 39 | 40 | // Returns the value. 41 | func (kv KVPair) Value() string { 42 | kv.assert() 43 | return kv.value 44 | } 45 | 46 | // Sets the value. 47 | func (kv *KVPair) SetValue(v string) { 48 | kv.assert() 49 | kv.value = v 50 | kv.assert() 51 | } 52 | 53 | // Returns a copy of the KVPair. 54 | func (kv KVPair) Copy() KVPair { 55 | kv.assert() 56 | return NewKVPair(kv.key, kv.value) 57 | } 58 | 59 | // Returns raw KVPair in format "foo=bar". 60 | func (kv KVPair) String() string { 61 | kv.assert() 62 | return kv.key + "=" + kv.value 63 | } 64 | 65 | // Returns URL encoded KVPair in format "foo=bar". 66 | // Calls url.QueryEscape on both key and value. 67 | func (kv KVPair) QueryEscape() string { 68 | kv.assert() 69 | return url.QueryEscape(kv.key) + "=" + url.QueryEscape(kv.value) 70 | } 71 | -------------------------------------------------------------------------------- /src/utils/morestrings/join.go: -------------------------------------------------------------------------------- 1 | package morestrings 2 | 3 | import ( 4 | "strings" 5 | ) 6 | 7 | // JoinNonEmpty concatenates the non empty elements of its first argument to create a single string. 8 | // The beg string is placen at the beginning, unless there are no elements. 9 | // The separator string sep is placed between elements in the resulting string. 10 | func JoinNonEmpty(beg, sep string, elems ...string) string { 11 | var nonEmptyElems = make([]string, 0, len(elems)) 12 | for _, elem := range elems { 13 | if elem != "" { 14 | nonEmptyElems = append(nonEmptyElems, elem) 15 | } 16 | } 17 | 18 | if len(nonEmptyElems) == 0 { 19 | return "" 20 | } else if len(nonEmptyElems) == 1 { 21 | return beg + nonEmptyElems[0] 22 | } else { 23 | return beg + strings.Join(nonEmptyElems, sep) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/utils/moretime/convert.go: -------------------------------------------------------------------------------- 1 | package moretime 2 | 3 | import ( 4 | "strconv" 5 | "time" 6 | 7 | "github.com/rs/zerolog/log" 8 | ) 9 | 10 | func handleAtoi(s string) int64 { 11 | i, err := strconv.Atoi(s) 12 | if err != nil { 13 | log.Panic(). 14 | Caller(). 15 | Err(err). 16 | Msg("Failed converting string to int") 17 | // ^PANIC 18 | } 19 | return int64(i) 20 | } 21 | 22 | func convertToDurationWithoutLastChar(s string) time.Duration { 23 | return time.Duration(handleAtoi(s[:len(s)-1])) 24 | } 25 | 26 | /* 27 | Converts the following to time.Duration: 28 | 29 | "1y" -> 1 year, 30 | "2M" -> 2 months, 31 | "3w" -> 3 weeks, 32 | "4d" -> 4 days, 33 | "5h" -> 5 hours, 34 | "6m" -> 6 minutes, 35 | "7s" -> 7 seconds, 36 | "8"-> 8 milliseconds 37 | */ 38 | func ConvertFromFancyTime(fancy string) time.Duration { 39 | switch fancy[len(fancy)-1] { 40 | case 'y': 41 | return convertToDurationWithoutLastChar(fancy) * Year 42 | case 'M': 43 | return convertToDurationWithoutLastChar(fancy) * Month 44 | case 'w': 45 | return convertToDurationWithoutLastChar(fancy) * Week 46 | case 'd': 47 | return convertToDurationWithoutLastChar(fancy) * Day 48 | case 'h': 49 | return convertToDurationWithoutLastChar(fancy) * time.Hour 50 | case 'm': 51 | return convertToDurationWithoutLastChar(fancy) * time.Minute 52 | case 's': 53 | return convertToDurationWithoutLastChar(fancy) * time.Second 54 | default: 55 | return time.Duration(handleAtoi(fancy)) * time.Millisecond 56 | } 57 | } 58 | 59 | // Converts to milliseconds. 60 | func ConvertToFancyTime(d time.Duration) string { 61 | return strconv.Itoa(int(d.Milliseconds())) 62 | } 63 | -------------------------------------------------------------------------------- /src/utils/moretime/types.go: -------------------------------------------------------------------------------- 1 | package moretime 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | const Day = 24 * time.Hour 8 | const Week = 7 * Day 9 | const Month = 30 * Day 10 | const Quarter = 3 * Month 11 | const HalfYear = 6 * Month 12 | const Year = 365 * Day 13 | -------------------------------------------------------------------------------- /src/utils/moreurls/build.go: -------------------------------------------------------------------------------- 1 | package moreurls 2 | 3 | import ( 4 | "net/url" 5 | 6 | "github.com/rs/zerolog/log" 7 | ) 8 | 9 | // Constructs a URL with the given parameters. 10 | func Build(urll string, params Params) string { 11 | // Parse the URL. 12 | u, err := url.Parse(urll) 13 | if err != nil { 14 | log.Panic(). 15 | Err(err). 16 | Str("url", urll). 17 | Msg("Failed to parse the URL") 18 | // ^PANIC - Assert correct URL 19 | } 20 | 21 | // Convert the parameters to encoded RawQuery keeping the order of keys. 22 | u.RawQuery = params.QueryEscape() 23 | 24 | return u.String() 25 | } 26 | -------------------------------------------------------------------------------- /src/utils/moreurls/fqdn.go: -------------------------------------------------------------------------------- 1 | package moreurls 2 | 3 | import ( 4 | "net/url" 5 | 6 | "github.com/rs/zerolog/log" 7 | ) 8 | 9 | // Returns the fully qualified domain name of the URL. 10 | func FQDN(urll string) string { 11 | // Check if the url is empty. 12 | if urll == "" { 13 | log.Panic(). 14 | Str("url", urll). 15 | Msg("URL is empty") 16 | } 17 | 18 | // Parse the URL. 19 | u, err := url.Parse(urll) 20 | if err != nil { 21 | log.Panic(). 22 | Err(err). 23 | Str("url", urll). 24 | Msg("Failed to parse the URL") 25 | // ^PANIC - Assert correct URL. 26 | } 27 | 28 | // Check if the hostname is empty. 29 | h := u.Hostname() 30 | if h == "" { 31 | log.Panic(). 32 | Str("url", urll). 33 | Msg("Hostname is empty") 34 | // ^PANIC - Assert non-empty URL. 35 | } 36 | 37 | return h 38 | } 39 | --------------------------------------------------------------------------------