├── .bazelignore ├── .dockerignore ├── .github └── workflows │ ├── buf-breaking-check.yml │ ├── buf-format-check.sh │ ├── buf-generate-check.sh │ ├── buf-lint-check.sh │ ├── ci.yml │ └── semgrep.yml ├── .gitignore ├── .tool-versions ├── .vscode └── launch.json ├── AGENT.md ├── CONTRIBUTING.md ├── Dockerfile ├── Dockerfile.indexserver ├── Dockerfile.webserver ├── LICENSE ├── README.md ├── SECURITY.md ├── all.bash ├── api.go ├── api_proto.go ├── api_proto_test.go ├── api_test.go ├── cmd ├── flags.go ├── zoekt-archive-index │ ├── flowrate.go │ └── main.go ├── zoekt-dynamic-indexserver │ ├── main.go │ └── main_test.go ├── zoekt-git-clone │ └── main.go ├── zoekt-git-index │ └── main.go ├── zoekt-index │ └── main.go ├── zoekt-indexserver │ ├── config.go │ └── main.go ├── zoekt-merge-index │ ├── main.go │ └── main_test.go ├── zoekt-mirror-bitbucket-server │ └── main.go ├── zoekt-mirror-gerrit │ └── main.go ├── zoekt-mirror-gitea │ └── main.go ├── zoekt-mirror-github │ └── main.go ├── zoekt-mirror-gitiles │ ├── cgit.go │ ├── gitiles.go │ └── main.go ├── zoekt-mirror-gitlab │ └── main.go ├── zoekt-repo-index │ └── main.go ├── zoekt-sourcegraph-indexserver │ ├── backoff.go │ ├── backoff_test.go │ ├── cleanup.go │ ├── cleanup_test.go │ ├── debug.go │ ├── default_grpc_service_configuration.json │ ├── grpc │ │ └── protos │ │ │ ├── README.md │ │ │ ├── buf.gen.yaml │ │ │ ├── buf.yaml │ │ │ ├── sourcegraph │ │ │ └── zoekt │ │ │ │ └── configuration │ │ │ │ └── v1 │ │ │ │ ├── configuration.pb.go │ │ │ │ ├── configuration.proto │ │ │ │ └── configuration_grpc.pb.go │ │ │ └── zoekt │ │ │ └── indexserver │ │ │ └── v1 │ │ │ ├── indexserver.pb.go │ │ │ ├── indexserver.proto │ │ │ └── indexserver_grpc.pb.go │ ├── index.go │ ├── index_mutex.go │ ├── index_test.go │ ├── json_schemas │ │ ├── CdsConfig.json │ │ ├── EdsLoadBalancingPolicyConfig.json │ │ ├── GrpcLbConfig.json │ │ ├── LeastRequestLocalityLoadBalancingPolicyConfig.json │ │ ├── LoadBalancingConfig.json │ │ ├── LrsLoadBalancingPolicyConfig.json │ │ ├── MethodConfig.json │ │ ├── OutlierDetectionLoadBalancingConfig.json │ │ ├── OverrideHostLoadBalancingPolicyConfig.json │ │ ├── PickFirstConfig.json │ │ ├── PriorityLoadBalancingPolicyConfig.json │ │ ├── RingHashLoadBalancingConfig.json │ │ ├── RlsLoadBalancingPolicyConfig.json │ │ ├── RoundRobinConfig.json │ │ ├── ServiceConfig.json │ │ ├── WeightedRoundRobinLbConfig.json │ │ ├── WeightedTargetLoadBalancingPolicyConfig.json │ │ ├── XdsClusterImplLoadBalancingPolicyConfig.json │ │ ├── XdsClusterManagerLoadBalancingPolicyConfig.json │ │ ├── XdsClusterResolverLoadBalancingPolicyConfig.json │ │ ├── XdsConfig.json │ │ ├── XdsServer.json │ │ ├── XdsWrrLocalityLoadBalancingPolicyConfig.json │ │ └── update.sh │ ├── main.go │ ├── main_test.go │ ├── merge.go │ ├── merge_test.go │ ├── meta.go │ ├── meta_test.go │ ├── owner.go │ ├── owner_test.go │ ├── purge.go │ ├── purge_test.go │ ├── queue.go │ ├── queue_test.go │ ├── sg.go │ └── sg_test.go ├── zoekt-test │ └── main.go ├── zoekt-webserver │ ├── grpc │ │ └── server │ │ │ ├── sampling.go │ │ │ ├── sampling_test.go │ │ │ ├── server.go │ │ │ └── server_test.go │ ├── main.go │ └── metrics.go └── zoekt │ └── main.go ├── ctag-overlay.nix ├── doc ├── ctags.md ├── design.md ├── faq.md ├── indexing.md ├── json-api.md └── query_syntax.md ├── flake.lock ├── flake.nix ├── gen-proto.sh ├── go.mod ├── go.sum ├── grpc ├── chunk │ ├── chunker.go │ └── chunker_test.go ├── defaults │ └── server.go ├── grpcutil │ ├── util.go │ └── util_test.go ├── internalerrs │ ├── common.go │ ├── common_test.go │ ├── logging.go │ └── prometheus.go ├── messagesize │ ├── messagesize.go │ ├── messagesize_test.go │ ├── prometheus.go │ └── prometheus_test.go ├── propagator │ └── propagator.go ├── protos │ ├── README.md │ ├── buf.gen.yaml │ ├── buf.yaml │ └── zoekt │ │ └── webserver │ │ └── v1 │ │ ├── query.pb.go │ │ ├── query.proto │ │ ├── webserver.pb.go │ │ ├── webserver.proto │ │ └── webserver_grpc.pb.go └── testprotos │ └── news │ └── v1 │ ├── buf.gen.yaml │ ├── news.pb.go │ └── news.proto ├── ignore ├── ignore.go └── ignore_test.go ├── index ├── bits.go ├── bits_test.go ├── btree.go ├── btree_test.go ├── builder.go ├── builder_test.go ├── contentprovider.go ├── contentprovider_test.go ├── ctags.go ├── ctags_test.go ├── document.go ├── eval.go ├── eval_test.go ├── file_category.go ├── file_category_test.go ├── hititer.go ├── hititer_test.go ├── index_test.go ├── indexdata.go ├── indexdata_test.go ├── indexfile.go ├── limit.go ├── limit_test.go ├── matchiter.go ├── matchiter_test.go ├── matchtree.go ├── matchtree_test.go ├── merge.go ├── merge_test.go ├── read.go ├── read_test.go ├── score.go ├── section.go ├── shard_builder.go ├── shard_builder_test.go ├── toc.go ├── tombstones.go ├── tombstones_test.go └── write.go ├── install-ctags-alpine.sh ├── internal ├── archive │ ├── archive.go │ ├── e2e_test.go │ └── index.go ├── ctags │ ├── parser.go │ ├── parser_bins.go │ ├── parser_test.go │ └── symbol_kind.go ├── debugserver │ ├── debug.go │ └── expvar.go ├── e2e │ ├── doc.go │ ├── e2e_index_test.go │ ├── e2e_rank_test.go │ ├── e2e_test.go │ ├── examples │ │ ├── example.bin │ │ ├── example.cc │ │ ├── example.java │ │ ├── example.kt │ │ ├── example.py │ │ ├── example.rb │ │ ├── example.scala │ │ ├── large_file.cc │ │ └── test_example.py │ ├── scoring_test.go │ └── testdata │ │ ├── Get_databaseuser.txt │ │ ├── InternalDoer.txt │ │ ├── Repository_metadata_Write_rbac.txt │ │ ├── WaitGroup.txt │ │ ├── assets_are_not_configured_for_this_binary.txt │ │ ├── bufio_buffer.txt │ │ ├── bufio_flush_writer.txt │ │ ├── bytes_buffer.txt │ │ ├── coverage_data_writer.txt │ │ ├── generate_unit_test.txt │ │ ├── graphql_type_User.txt │ │ ├── r_cody_sourcegraph_url.txt │ │ ├── rank_stats.txt │ │ ├── sourcegraphserver_docker_image_build.txt │ │ ├── test_server.txt │ │ ├── time_compare.txt │ │ └── zoekt_searcher.txt ├── gitindex │ ├── clone.go │ ├── clone_test.go │ ├── delete.go │ ├── delete_test.go │ ├── filter.go │ ├── ignore_test.go │ ├── index.go │ ├── index_test.go │ ├── repocache.go │ ├── repocache_test.go │ ├── submodule.go │ ├── submodule_test.go │ ├── tree.go │ └── tree_test.go ├── json │ ├── json.go │ └── json_test.go ├── languages │ ├── language.go │ └── language_test.go ├── mockSearcher │ └── mock_searcher.go ├── otlpenv │ └── otlpenv.go ├── profiler │ └── profiler.go ├── syntaxutil │ ├── README.md │ ├── alias_test.go │ ├── parse_test.go │ └── regexp.go ├── tenant │ ├── context.go │ ├── enforcement.go │ ├── grpc.go │ ├── internal │ │ ├── enforcement │ │ │ └── enforcement.go │ │ └── tenanttype │ │ │ ├── type.go │ │ │ └── type_test.go │ ├── query.go │ ├── systemtenant │ │ ├── systemtenant.go │ │ └── systemtenant_test.go │ └── tenanttest │ │ └── tenanttest.go ├── trace │ ├── middleware.go │ ├── opentracing.go │ └── trace.go └── tracer │ ├── jaeger.go │ ├── opentelemetry.go │ └── tracer.go ├── marshal.go ├── marshal_test.go ├── query ├── bits.go ├── doc.go ├── marshal.go ├── marshal_test.go ├── parse.go ├── parse_test.go ├── query.go ├── query_proto.go ├── query_proto_test.go ├── query_test.go ├── regexp.go └── regexp_test.go ├── search ├── aggregate.go ├── eval.go ├── eval_test.go ├── sched.go ├── sched_test.go ├── shards.go ├── shards_test.go ├── watcher.go └── watcher_test.go ├── shell.nix ├── testdata ├── backcompat │ └── static_toc_v16.00000.zoekt ├── fuzz │ └── Fuzz_RepoList_ProtoRoundTrip │ │ ├── 5f697656db1d7c3c │ │ └── aeb560833e6a2ff8 ├── gen-shards.sh ├── golden │ └── TestReadSearch │ │ ├── ctagsrepo_v16.00000.golden │ │ ├── ctagsrepo_v17.00000.golden │ │ ├── repo17_v17.00000.golden │ │ ├── repo2_v16.00000.golden │ │ └── repo_v16.00000.golden ├── repo │ └── main.go ├── repo2 │ └── main.go ├── search_result_1.pb └── shards │ ├── ctagsrepo_v16.00000.zoekt │ ├── ctagsrepo_v17.00000.zoekt │ ├── repo17_v17.00000.zoekt │ ├── repo2_v16.00000.zoekt │ └── repo_v16.00000.zoekt └── web ├── api.go ├── doc.go ├── e2e_test.go ├── server.go ├── server_test.go ├── snippets.go ├── templates.go └── trace.go /.bazelignore: -------------------------------------------------------------------------------- 1 | .direnv 2 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | Dockerfile* 3 | .dockerignore 4 | -------------------------------------------------------------------------------- /.github/workflows/buf-breaking-check.yml: -------------------------------------------------------------------------------- 1 | on: 2 | pull_request: 3 | types: 4 | - opened 5 | paths: 6 | - '*.proto' 7 | jobs: 8 | validate-protos: 9 | runs-on: ubuntu-latest 10 | steps: 11 | # Run `git checkout` 12 | - uses: actions/checkout@v2 13 | # Install the `buf` CLI 14 | - uses: bufbuild/buf-setup-action@v1 15 | # Run breaking change detection against the `main` branch 16 | - uses: bufbuild/buf-breaking-action@v1 17 | with: 18 | against: 'https://github.com/sourcegraph/zoekt.git#branch=main' 19 | -------------------------------------------------------------------------------- /.github/workflows/buf-format-check.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd "$(dirname "${BASH_SOURCE[0]}")/../.." 4 | set -euo pipefail 5 | 6 | find . -name "*.proto" -not -path ".git" | while read -r proto_file; do 7 | buf format -w --path "$proto_file" 8 | done 9 | 10 | if ! git diff --exit-code; then 11 | echo "buf format produced changes, please run buf format -w and commit the changes" 12 | exit 1 13 | fi 14 | -------------------------------------------------------------------------------- /.github/workflows/buf-generate-check.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd "$(dirname "${BASH_SOURCE[0]}")/../.." 4 | set -euo pipefail 5 | 6 | find . -name "buf.gen.yaml" -not -path ".git" | while read -r buf_yaml; do 7 | pushd "$(dirname "${buf_yaml}")" >/dev/null 8 | 9 | if ! buf generate; then 10 | echo "running buf generate on ${buf_yaml} failed, please examine the output and fix the issues" 11 | exit 1 12 | fi 13 | 14 | popd >/dev/null 15 | done 16 | 17 | if ! git diff --exit-code; then 18 | echo "buf generate produced changes in the above file(s), please run buf generate and commit the changes" 19 | exit 1 20 | fi 21 | 22 | if ! (git ls-files --others --exclude-standard . | tee >(grep -q .)); then 23 | echo "buf generate produced the above untracked file(s), please run buf generate and commit them" 24 | exit 1 25 | fi 26 | -------------------------------------------------------------------------------- /.github/workflows/buf-lint-check.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd "$(dirname "${BASH_SOURCE[0]}")/../.." 4 | set -euo pipefail 5 | 6 | find . -name "buf.yaml" -not -path ".git" | while read -r buf_yaml; do 7 | pushd "$(dirname "${buf_yaml}")" >/dev/null 8 | 9 | if ! buf lint .; then 10 | echo "running buf lint on ${buf_yaml} failed, please examine the output and fix the issues" 11 | exit 1 12 | fi 13 | 14 | popd >/dev/null 15 | done 16 | -------------------------------------------------------------------------------- /.github/workflows/semgrep.yml: -------------------------------------------------------------------------------- 1 | name: Semgrep - SAST Scan 2 | 3 | on: 4 | pull_request_target: 5 | types: [ closed, edited, opened, synchronize, ready_for_review ] 6 | 7 | jobs: 8 | semgrep: 9 | permissions: 10 | contents: read # for actions/checkout to fetch code 11 | security-events: write # for github/codeql-action/upload-sarif to upload SARIF results 12 | actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status 13 | runs-on: ubuntu-latest 14 | container: 15 | image: returntocorp/semgrep 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | ref: ${{ github.event.pull_request.head.ref }} 21 | repository: ${{ github.event.pull_request.head.repo.full_name }} 22 | 23 | - name: Checkout semgrep-rules repo 24 | uses: actions/checkout@v4 25 | with: 26 | repository: sourcegraph/security-semgrep-rules 27 | token: ${{ secrets.GH_SEMGREP_SAST_TOKEN }} 28 | path: semgrep-rules 29 | 30 | - name: Run Semgrep SAST Scan 31 | run: | 32 | mv semgrep-rules ../ 33 | semgrep ci -f ../semgrep-rules/semgrep-rules/ --metrics=off --oss-only --suppress-errors --sarif -o results.sarif --exclude='semgrep-rules' --baseline-commit "$(git merge-base main HEAD)" || true 34 | - name: Upload SARIF file 35 | uses: github/codeql-action/upload-sarif@v3 36 | with: 37 | sarif_file: results.sarif -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | cmd/zoekt-index/zoekt-index 3 | cmd/zoekt-webserver/zoekt-webserver 4 | cmd/zoekt-mirror-github/zoekt-mirror-github 5 | cmd/zoekt-server/zoekt-server 6 | cmd/zoekt-git-index/zoekt-git-index 7 | .envrc 8 | .idea 9 | .direnv 10 | bazel-bin 11 | bazel-out 12 | bazel-testlogs 13 | bazel-zoekt 14 | -------------------------------------------------------------------------------- /.tool-versions: -------------------------------------------------------------------------------- 1 | golang 1.23.4 2 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Index folder", 9 | "type": "go", 10 | "request": "launch", 11 | "mode": "auto", 12 | "program": "cmd/zoekt-git-index", 13 | "cwd": "${workspaceFolder}", 14 | "args": ["-index", "${input:indexPath}", "${input:path}"] 15 | }, 16 | { 17 | "name": "Webserver", 18 | "type": "go", 19 | "request": "launch", 20 | "mode": "auto", 21 | "program": "cmd/zoekt-webserver", 22 | "cwd": "${workspaceFolder}", 23 | "args": ["-index", "${input:indexPath}"] 24 | }, 25 | { 26 | "name": "Attach to Process (from list)", 27 | "type": "go", 28 | "request": "attach", 29 | "mode": "local" 30 | } 31 | ], 32 | "inputs": [ 33 | { 34 | "id": "path", 35 | "description": "Please enter the path to the project to index", 36 | "default": "", 37 | "type": "promptString" 38 | }, 39 | { 40 | "id": "indexPath", 41 | "description": "Enter the path where indexes are stored", 42 | "default": "${userHome}/.zoekt", 43 | "type": "promptString" 44 | } 45 | ] 46 | } 47 | -------------------------------------------------------------------------------- /AGENT.md: -------------------------------------------------------------------------------- 1 | # Zoekt Coding Agent Guidelines 2 | 3 | ## Build & Test Commands 4 | - Build: `go build ./cmd/...` 5 | - Run all tests: `go test ./... -short` 6 | - Run a single test: `go test -run=TestName ./path/to/package` 7 | - Run specific test with verbose output: `go test -v -run=TestName ./path/to/package` 8 | - Benchmark: `go test -bench=BenchmarkName ./path/to/package` 9 | - Fuzzing: `go test -fuzz=FuzzTestName -fuzztime=30s ./package` 10 | - Smoke test: Check a specific repo: `go run ./cmd/zoekt-git-index /path/to/repo` 11 | 12 | ## Code Style Guidelines 13 | - Import format: standard Go imports (stdlib, external, internal) with alphabetical sorting 14 | - Error handling: explicit error checking with proper returns (no ignored errors) 15 | - Naming: Go standard (CamelCase for exported, camelCase for private) 16 | - Tests: Table-driven tests preferred with descriptive names 17 | - Documentation: All exported functions should have comments 18 | - Shell scripts: Use shfmt with `-i 2 -ci -bn` flags 19 | - Proto files: Run buf lint and format checks 20 | - Memory optimization: As a code search database, Zoekt is memory-sensitive - be conscious of struct field ordering and memory usage in core structures 21 | 22 | ## Documentation Resources 23 | - Design overview: `doc/design.md` - Core architecture and search methodology 24 | - Indexing details: `doc/indexing.md` - How the indexing process works 25 | - Query syntax: `doc/query_syntax.md` - Search query language reference 26 | - FAQ: `doc/faq.md` - Common questions and troubleshooting -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | We welcome contributions to the project! To propose a change, please fork the repository, make your changes, then submit 4 | a pull request. If the change is significant or potentially controversial, please open an issue first to discuss it. 5 | Zoekt does not require a CLA to contribute. 6 | 7 | Before opening a pull request, make sure that you have run the tests locally: 8 | ```sh 9 | go test ./... 10 | ``` 11 | 12 | It's also good to run a local smoke test for the relevant component. For example, if you've made changes in 13 | `zoekt-git-index`, you can try indexing a repository locally: 14 | ```sh 15 | go run ./cmd/zoekt-git-index /path/to/repo 16 | ``` 17 | 18 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.23.4-alpine3.19 AS builder 2 | 3 | RUN apk add --no-cache ca-certificates 4 | 5 | ENV CGO_ENABLED=0 6 | WORKDIR /go/src/github.com/sourcegraph/zoekt 7 | 8 | # Cache dependencies 9 | COPY go.mod go.sum ./ 10 | RUN go mod download 11 | 12 | COPY . ./ 13 | ARG VERSION 14 | RUN go install -ldflags "-X github.com/sourcegraph/zoekt.Version=$VERSION" ./cmd/... 15 | 16 | FROM alpine:3.19 AS zoekt 17 | 18 | RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget 19 | 20 | COPY install-ctags-alpine.sh . 21 | RUN ./install-ctags-alpine.sh && rm install-ctags-alpine.sh 22 | 23 | COPY --from=builder /go/bin/* /usr/local/bin/ 24 | 25 | ENTRYPOINT ["/sbin/tini", "--"] 26 | -------------------------------------------------------------------------------- /Dockerfile.indexserver: -------------------------------------------------------------------------------- 1 | FROM alpine:3.19 2 | 3 | RUN apk add --no-cache ca-certificates bind-tools tini git jansson 4 | 5 | # Run as non-root user sourcegraph. External volumes should be mounted under /data (which will be owned by sourcegraph). 6 | RUN mkdir -p /home/sourcegraph 7 | RUN addgroup -S sourcegraph && adduser -S -G sourcegraph -h /home/sourcegraph sourcegraph && mkdir -p /data && chown -R sourcegraph:sourcegraph /data 8 | USER sourcegraph 9 | WORKDIR /home/sourcegraph 10 | 11 | ENV SRC_FRONTEND_INTERNAL http://sourcegraph-frontend-internal 12 | ENV DATA_DIR /data/index 13 | RUN mkdir -p ${DATA_DIR} 14 | 15 | COPY --from=zoekt \ 16 | /usr/local/bin/universal-* \ 17 | /usr/local/bin/zoekt-sourcegraph-indexserver \ 18 | /usr/local/bin/zoekt-archive-index \ 19 | /usr/local/bin/zoekt-git-index \ 20 | /usr/local/bin/zoekt-merge-index \ 21 | /usr/local/bin/ 22 | 23 | ENTRYPOINT ["/sbin/tini", "--", "zoekt-sourcegraph-indexserver"] 24 | -------------------------------------------------------------------------------- /Dockerfile.webserver: -------------------------------------------------------------------------------- 1 | FROM alpine:3.19 2 | 3 | RUN apk add --no-cache ca-certificates bind-tools tini 4 | 5 | # Run as non-root user sourcegraph. External volumes should be mounted under /data (which will be owned by sourcegraph). 6 | RUN mkdir -p /home/sourcegraph 7 | RUN addgroup -S sourcegraph && adduser -S -G sourcegraph -h /home/sourcegraph sourcegraph && mkdir -p /data && chown -R sourcegraph:sourcegraph /data 8 | USER sourcegraph 9 | WORKDIR /home/sourcegraph 10 | 11 | ENV DATA_DIR /data/index 12 | RUN mkdir -p ${DATA_DIR} 13 | 14 | # We copy from the locally built zoekt image 15 | COPY --from=zoekt /usr/local/bin/zoekt-webserver /usr/local/bin/ 16 | 17 | # zoekt-webserver has a large stable heap size (10s of gigs), and as such the 18 | # default GOGC=100 could be better tuned. https://dave.cheney.net/tag/gogc 19 | # In go1.18 the GC changed significantly and from experimentation we tuned it 20 | # down from 50 to 25. 21 | ENV GOGC=25 22 | 23 | ENTRYPOINT ["/sbin/tini", "--"] 24 | CMD zoekt-webserver -index $DATA_DIR -pprof -rpc -indexserver_proxy 25 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | Our security policy is documented at https://sourcegraph.com/security. 4 | -------------------------------------------------------------------------------- /all.bash: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | go test github.com/sourcegraph/zoekt/... 4 | go install github.com/sourcegraph/zoekt/cmd/... 5 | -------------------------------------------------------------------------------- /cmd/flags.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package cmd 16 | 17 | import ( 18 | "flag" 19 | "fmt" 20 | "os" 21 | "path/filepath" 22 | 23 | "github.com/sourcegraph/zoekt/index" 24 | ) 25 | 26 | var ( 27 | version = flag.Bool("version", false, "Print version number") 28 | opts = &index.Options{} 29 | ) 30 | 31 | func init() { 32 | opts.Flags(flag.CommandLine) 33 | } 34 | 35 | func OptionsFromFlags() *index.Options { 36 | if *version { 37 | name := filepath.Base(os.Args[0]) 38 | fmt.Printf("%s version %q\n", name, index.Version) 39 | os.Exit(0) 40 | } 41 | 42 | opts.SetDefaults() 43 | return opts 44 | } 45 | -------------------------------------------------------------------------------- /cmd/zoekt-archive-index/flowrate.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "io" 6 | "net" 7 | "net/http" 8 | 9 | "github.com/mxk/go-flowrate/flowrate" 10 | ) 11 | 12 | type connReadWriter struct { 13 | net.Conn 14 | 15 | Reader io.Reader 16 | Writer io.Writer 17 | } 18 | 19 | func (c *connReadWriter) Read(b []byte) (int, error) { 20 | return c.Reader.Read(b) 21 | } 22 | 23 | func (c *connReadWriter) Write(b []byte) (int, error) { 24 | return c.Writer.Write(b) 25 | } 26 | 27 | type dial func(ctx context.Context, network, addr string) (net.Conn, error) 28 | 29 | func limitDial(d dial, limit int64) dial { 30 | if limit <= 0 { 31 | return d 32 | } 33 | 34 | return func(ctx context.Context, network, addr string) (net.Conn, error) { 35 | conn, err := d(ctx, network, addr) 36 | if err != nil { 37 | return nil, err 38 | } 39 | return &connReadWriter{ 40 | Conn: conn, 41 | Reader: flowrate.NewReader(conn, limit), 42 | Writer: flowrate.NewWriter(conn, limit), 43 | }, nil 44 | } 45 | } 46 | 47 | func limitHTTPDefaultClient(limitMbps int64) { 48 | if limitMbps <= 0 { 49 | return 50 | } 51 | 52 | const megabit = 1000 * 1000 53 | limit := (limitMbps * megabit) / 8 54 | 55 | t := http.DefaultTransport.(*http.Transport) 56 | t.DialContext = limitDial(t.DialContext, limit) 57 | } 58 | -------------------------------------------------------------------------------- /cmd/zoekt-archive-index/main.go: -------------------------------------------------------------------------------- 1 | // Command zoekt-archive-index indexes a git archive. 2 | // 3 | // Examples using github.com: 4 | // 5 | // zoekt-archive-index -incremental -commit b57cb1605fd11ba2ecfa7f68992b4b9cc791934d -name github.com/gorilla/mux -strip_components 1 https://codeload.github.com/gorilla/mux/legacy.tar.gz/b57cb1605fd11ba2ecfa7f68992b4b9cc791934d 6 | // 7 | // zoekt-archive-index -branch master https://github.com/gorilla/mux/commit/b57cb1605fd11ba2ecfa7f68992b4b9cc791934d 8 | package main 9 | 10 | import ( 11 | "flag" 12 | "log" 13 | 14 | "go.uber.org/automaxprocs/maxprocs" 15 | 16 | "github.com/sourcegraph/zoekt/cmd" 17 | "github.com/sourcegraph/zoekt/internal/archive" 18 | ) 19 | 20 | func main() { 21 | var ( 22 | incremental = flag.Bool("incremental", true, "only index changed repositories") 23 | 24 | name = flag.String("name", "", "The repository name for the archive") 25 | urlRaw = flag.String("url", "", "The repository URL for the archive") 26 | branch = flag.String("branch", "", "The branch name for the archive") 27 | commit = flag.String("commit", "", "The commit sha for the archive. If incremental this will avoid updating shards already at commit") 28 | strip = flag.Int("strip_components", 0, "Remove the specified number of leading path elements. Pathnames with fewer elements will be silently skipped.") 29 | 30 | downloadLimitMbps = flag.Int64("download-limit-mbps", 0, "If non-zero, limit archive downloads to specified amount in megabits per second") 31 | ) 32 | flag.Parse() 33 | 34 | // Tune GOMAXPROCS to match Linux container CPU quota. 35 | _, _ = maxprocs.Set() 36 | 37 | log.SetFlags(log.LstdFlags | log.Lshortfile) 38 | 39 | if len(flag.Args()) != 1 { 40 | log.Fatal("expected argument for archive location") 41 | } 42 | archiveURL := flag.Args()[0] 43 | bopts := cmd.OptionsFromFlags() 44 | opts := archive.Options{ 45 | Incremental: *incremental, 46 | 47 | Archive: archiveURL, 48 | Name: *name, 49 | RepoURL: *urlRaw, 50 | Branch: *branch, 51 | Commit: *commit, 52 | Strip: *strip, 53 | } 54 | 55 | // Sourcegraph specific: Limit HTTP traffic 56 | limitHTTPDefaultClient(*downloadLimitMbps) 57 | 58 | if err := archive.Index(opts, *bopts); err != nil { 59 | log.Fatal(err) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /cmd/zoekt-git-clone/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Command zoekt-git-clone fetches all repos of a user or organization and clones 16 | // them. It is strongly recommended to get a personal API token from 17 | // https://github.com/settings/tokens, save the token in a file, and 18 | // point the --token option to it. 19 | package main 20 | 21 | import ( 22 | "flag" 23 | "fmt" 24 | "log" 25 | "net/url" 26 | "os" 27 | "path/filepath" 28 | "strconv" 29 | "strings" 30 | 31 | "github.com/sourcegraph/zoekt/internal/gitindex" 32 | ) 33 | 34 | func main() { 35 | dest := flag.String("dest", "", "destination directory") 36 | nameFlag := flag.String("name", "", "name of repository") 37 | repoIDFlag := flag.Uint("repoid", 0, "id of repository") 38 | flag.Parse() 39 | 40 | if *dest == "" { 41 | log.Fatal("must set --dest") 42 | } 43 | if len(flag.Args()) == 0 { 44 | log.Fatal("must provide URL") 45 | } 46 | u, err := url.Parse(flag.Arg(0)) 47 | if err != nil { 48 | log.Fatalf("url.Parse: %v", err) 49 | } 50 | 51 | name := *nameFlag 52 | if name == "" { 53 | name = filepath.Join(u.Host, u.Path) 54 | name = strings.TrimSuffix(name, ".git") 55 | } 56 | 57 | destDir := filepath.Dir(filepath.Join(*dest, name)) 58 | if err := os.MkdirAll(destDir, 0o755); err != nil { 59 | log.Fatal(err) 60 | } 61 | 62 | config := map[string]string{ 63 | "zoekt.name": name, 64 | } 65 | 66 | repoID := *repoIDFlag 67 | if repoID != 0 { 68 | config["zoekt.repoid"] = strconv.FormatUint(uint64(repoID), 10) 69 | } 70 | 71 | destRepo, err := gitindex.CloneRepo(destDir, filepath.Base(name), u.String(), config) 72 | if err != nil { 73 | log.Fatalf("CloneRepo: %v", err) 74 | } 75 | if destRepo != "" { 76 | fmt.Println(destRepo) 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /cmd/zoekt-mirror-gitiles/gitiles.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | "bytes" 19 | "encoding/json" 20 | "io" 21 | "net/http" 22 | "net/url" 23 | "path" 24 | ) 25 | 26 | type Project struct { 27 | Name string 28 | CloneURL string `json:"clone_url"` 29 | } 30 | 31 | func getGitilesRepos(root *url.URL, filter func(string) bool) (map[string]*crawlTarget, error) { 32 | jsRoot := *root 33 | jsRoot.RawQuery = "format=JSON" 34 | resp, err := http.Get(jsRoot.String()) 35 | if err != nil { 36 | return nil, err 37 | } 38 | defer resp.Body.Close() 39 | 40 | content, err := io.ReadAll(resp.Body) 41 | if err != nil { 42 | return nil, err 43 | } 44 | 45 | const xssTag = ")]}'\n" 46 | content = bytes.TrimPrefix(content, []byte(xssTag)) 47 | 48 | m := map[string]*Project{} 49 | if err := json.Unmarshal(content, &m); err != nil { 50 | return nil, err 51 | } 52 | 53 | result := map[string]*crawlTarget{} 54 | for k, v := range m { 55 | if k == "All-Users" || k == "All-Projects" { 56 | continue 57 | } 58 | if !filter(k) { 59 | continue 60 | } 61 | web := *root 62 | web.Path = path.Join(web.Path, v.Name) 63 | result[k] = &crawlTarget{ 64 | cloneURL: v.CloneURL, 65 | webURL: web.String(), 66 | webURLType: "gitiles", 67 | } 68 | } 69 | return result, nil 70 | } 71 | -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/backoff.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/sourcegraph/log" 7 | ) 8 | 9 | type backoff struct { 10 | // maxBackoff is the longest duration we will backoff indexing operations of the given repo. 11 | maxBackoff time.Duration 12 | // backoffDuration is used to determine the duration of backoff. consecutiveFailures * backoffDuration calculates the 13 | // duration set on failed indexing attempt. 14 | backoffDuration time.Duration 15 | // consecutiveFailures is the count of preceding consecutive failures. 16 | consecutiveFailures int 17 | // backOffUntil is the earliest time when we allow the item to be pushed to the heap. Until then the item will not be enqueued 18 | // and indexing will not be attempted. 19 | backoffUntil time.Time 20 | } 21 | 22 | func (b *backoff) Allow(now time.Time) bool { 23 | return b.backoffUntil.Before(now) 24 | } 25 | 26 | func (b *backoff) Reset() { 27 | b.consecutiveFailures = 0 28 | b.backoffUntil = time.Unix(0, 0) 29 | } 30 | 31 | func (b *backoff) Fail(now time.Time, logger log.Logger, opts IndexOptions) { 32 | backoffDuration := time.Duration(b.consecutiveFailures+1) * b.backoffDuration 33 | 34 | if backoffDuration > b.maxBackoff { 35 | backoffDuration = b.maxBackoff 36 | } else { 37 | b.consecutiveFailures++ 38 | } 39 | b.backoffUntil = now.Add(backoffDuration) 40 | 41 | logger.Debug("Backoff subsequent attempts to index repository", 42 | log.String("repo", opts.Name), 43 | log.Uint32("id", opts.RepoID), 44 | log.Duration("backoff_duration", b.backoffDuration), 45 | log.Time("backoff_until", b.backoffUntil), 46 | ) 47 | } 48 | -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/default_grpc_service_configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "./json_schemas/ServiceConfig.json", 3 | "methodConfig": [ 4 | { 5 | "name": [ 6 | { 7 | "service": "configuration_service.v1.IndexedSearchConfigurationService" 8 | } 9 | ], 10 | 11 | "retryPolicy": { 12 | "maxAttempts": 4, 13 | "initialBackoff": "1s", 14 | "maxBackoff": "30s", 15 | "backoffMultiplier": 2, 16 | "retryableStatusCodes": [ 17 | "UNAVAILABLE", 18 | "ABORTED" 19 | ] 20 | } 21 | } 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/grpc/protos/README.md: -------------------------------------------------------------------------------- 1 | # Sourcegraph indexserver protobuf definitions 2 | 3 | This directory contains protobuf definitions for the indexserver gRPC API. 4 | 5 | To generate the Go code, run this script from the repository root: 6 | 7 | ```sh 8 | ./gen-proto.sh 9 | ``` 10 | 11 | Note: this script will regenerate all protos in the project, not just the ones in this directory. 12 | -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/grpc/protos/buf.gen.yaml: -------------------------------------------------------------------------------- 1 | # Configuration file for https://buf.build/, which we use for Protobuf code generation. 2 | version: v1 3 | plugins: 4 | - plugin: buf.build/protocolbuffers/go:v1.28.1 5 | out: . 6 | opt: 7 | - paths=source_relative 8 | - plugin: buf.build/grpc/go:v1.3.0 9 | out: . 10 | opt: 11 | - paths=source_relative 12 | -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/grpc/protos/buf.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | breaking: 3 | use: 4 | - FILE 5 | lint: 6 | use: 7 | - DEFAULT -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/grpc/protos/zoekt/indexserver/v1/indexserver.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package zoekt.indexserver.v1; 4 | 5 | import "sourcegraph/zoekt/configuration/v1/configuration.proto"; 6 | 7 | option go_package = "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/protos/zoekt/indexserver/v1"; 8 | 9 | message DeleteAllDataRequest {} 10 | 11 | message DeleteAllDataResponse {} 12 | 13 | message DeleteRequest { 14 | // repo_ids are the Sourcegraph repository IDs to delete. 15 | repeated uint32 repo_ids = 1; 16 | } 17 | 18 | message DeleteResponse {} 19 | 20 | message IndexRequest { 21 | sourcegraph.zoekt.configuration.v1.ZoektIndexOptions options = 1; 22 | } 23 | 24 | message IndexResponse { 25 | // repo_id is the Sourcegraph repository ID. 26 | uint32 repo_id = 1; 27 | 28 | // branches is the list of branches that the client has indexed. 29 | repeated sourcegraph.zoekt.configuration.v1.ZoektRepositoryBranch branches = 2; 30 | 31 | // index_time_unix is the unix timestamp for when the index was created. 32 | int64 index_time_unix = 3; 33 | } 34 | 35 | service SourcegraphIndexserverService { 36 | // DeleteAllData deletes all data for the tenant in the request context. 37 | // This is used for pruning all data after a tenant has been deleted. 38 | rpc DeleteAllData(DeleteAllDataRequest) returns (DeleteAllDataResponse) { 39 | option idempotency_level = IDEMPOTENT; 40 | } 41 | 42 | // Delete deletes the index for a specific repository. 43 | // This is used when a repository needs to be reindexed from scratch or when it's deleted. 44 | rpc Delete(DeleteRequest) returns (DeleteResponse) { 45 | option idempotency_level = IDEMPOTENT; 46 | } 47 | 48 | // Index indexes a repository with the given options and returns the repository information 49 | // including the index timestamp. This information is needed by the frontend to update its state 50 | // since Zoekt no longer sends status updates to Sourcegraph. 51 | rpc Index(IndexRequest) returns (IndexResponse) { 52 | option idempotency_level = IDEMPOTENT; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/index_mutex.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/prometheus/client_golang/prometheus" 7 | "github.com/prometheus/client_golang/prometheus/promauto" 8 | ) 9 | 10 | // indexMutex is the concurrency control we have for operations that operate 11 | // on the index directory. We have two broad operations: global and repository 12 | // specific. A global operation is like a write lock on the whole directory. A 13 | // repository operation ensure we don't have multiple operations happening for 14 | // the same repository. 15 | type indexMutex struct { 16 | // indexMu protects state in index directory. global takes write lock, repo 17 | // takes read lock. 18 | indexMu sync.RWMutex 19 | 20 | // runningMu protects running. You need to first be holding indexMu. 21 | runningMu sync.Mutex 22 | 23 | // running maps by name since that is what we key by on disk. Once we start 24 | // keying by repo ID on disk, we should switch to uint32. 25 | running map[string]struct{} 26 | } 27 | 28 | // With runs f if no other f with the same repoName is running. If f runs true 29 | // is returned, otherwise false is returned. 30 | // 31 | // With blocks if f runs or the Global lock is held. 32 | func (m *indexMutex) With(repoName string, f func()) bool { 33 | m.indexMu.RLock() 34 | defer m.indexMu.RUnlock() 35 | 36 | // init running; check and set running[repoName] 37 | m.runningMu.Lock() 38 | if m.running == nil { 39 | m.running = map[string]struct{}{} 40 | } 41 | _, alreadyRunning := m.running[repoName] 42 | m.running[repoName] = struct{}{} 43 | m.runningMu.Unlock() 44 | 45 | if alreadyRunning { 46 | metricIndexMutexAlreadyRunning.Inc() 47 | return false 48 | } 49 | 50 | // release running[repoName] 51 | defer func() { 52 | m.runningMu.Lock() 53 | delete(m.running, repoName) 54 | m.runningMu.Unlock() 55 | }() 56 | 57 | metricIndexMutexRepo.Inc() 58 | defer metricIndexMutexRepo.Dec() 59 | 60 | f() 61 | 62 | return true 63 | } 64 | 65 | // Global runs f once the global lock is held. IE no other Global or With f's 66 | // will be running. 67 | func (m *indexMutex) Global(f func()) { 68 | metricIndexMutexGlobal.Inc() 69 | defer metricIndexMutexGlobal.Dec() 70 | 71 | m.indexMu.Lock() 72 | defer m.indexMu.Unlock() 73 | 74 | f() 75 | } 76 | 77 | var ( 78 | metricIndexMutexAlreadyRunning = promauto.NewCounter(prometheus.CounterOpts{ 79 | Name: "index_mutex_already_running_total", 80 | Help: "Total number of times we skipped processing a repository since an index was already running.", 81 | }) 82 | 83 | metricIndexMutexGlobal = promauto.NewGauge(prometheus.GaugeOpts{ 84 | Name: "index_mutex_global", 85 | Help: "The number of goroutines trying to or holding the global lock.", 86 | }) 87 | 88 | metricIndexMutexRepo = promauto.NewGauge(prometheus.GaugeOpts{ 89 | Name: "index_mutex_repository", 90 | Help: "The number of goroutines successfully holding a repo lock.", 91 | }) 92 | ) 93 | -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/json_schemas/CdsConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "$ref": "#/definitions/CdsConfig", 4 | "definitions": { 5 | "CdsConfig": { 6 | "properties": { 7 | "cluster": { 8 | "type": "string", 9 | "description": "Required." 10 | } 11 | }, 12 | "additionalProperties": true, 13 | "type": "object", 14 | "title": "Cds Config", 15 | "description": "Configuration for the cds LB policy." 16 | } 17 | } 18 | } -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/json_schemas/LeastRequestLocalityLoadBalancingPolicyConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "$ref": "#/definitions/LeastRequestLocalityLoadBalancingPolicyConfig", 4 | "definitions": { 5 | "LeastRequestLocalityLoadBalancingPolicyConfig": { 6 | "properties": { 7 | "choiceCount": { 8 | "type": "string" 9 | } 10 | }, 11 | "additionalProperties": true, 12 | "type": "object", 13 | "title": "Least Request Locality Load Balancing Policy Config", 14 | "description": "Configuration for the least_request LB policy." 15 | } 16 | } 17 | } -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/json_schemas/PickFirstConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "$ref": "#/definitions/PickFirstConfig", 4 | "definitions": { 5 | "PickFirstConfig": { 6 | "additionalProperties": true, 7 | "type": "object", 8 | "title": "Pick First Config", 9 | "description": "Configuration for pick_first LB policy." 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/json_schemas/RingHashLoadBalancingConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "$ref": "#/definitions/RingHashLoadBalancingConfig", 4 | "definitions": { 5 | "RingHashLoadBalancingConfig": { 6 | "properties": { 7 | "minRingSize": { 8 | "type": "string", 9 | "description": "A client-side option will cap these values to 4096. If either of these values are greater than the client-side cap, they will be treated as the client-side cap value. Optional, defaults to 1024, max 8M." 10 | }, 11 | "maxRingSize": { 12 | "type": "string", 13 | "description": "Optional, defaults to 4096, max 8M." 14 | } 15 | }, 16 | "additionalProperties": true, 17 | "type": "object", 18 | "title": "Ring Hash Load Balancing Config", 19 | "description": "Configuration for ring_hash LB policy." 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/json_schemas/RoundRobinConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "$ref": "#/definitions/RoundRobinConfig", 4 | "definitions": { 5 | "RoundRobinConfig": { 6 | "additionalProperties": true, 7 | "type": "object", 8 | "title": "Round Robin Config", 9 | "description": "Configuration for round_robin LB policy." 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/json_schemas/WeightedRoundRobinLbConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "$ref": "#/definitions/WeightedRoundRobinLbConfig", 4 | "definitions": { 5 | "WeightedRoundRobinLbConfig": { 6 | "properties": { 7 | "enableOobLoadReport": { 8 | "additionalProperties": true, 9 | "type": "boolean", 10 | "description": "Whether to enable out-of-band utilization reporting collection from the endpoints. By default, per-request utilization reporting is used." 11 | }, 12 | "oobReportingPeriod": { 13 | "pattern": "^([0-9]+\\.?[0-9]*|\\.[0-9]+)s$", 14 | "type": "string", 15 | "description": "Load reporting interval to request from the server. Note that the server may not provide reports as frequently as the client requests. Used only when enable_oob_load_report is true. Default is 10 seconds.", 16 | "format": "regex" 17 | }, 18 | "blackoutPeriod": { 19 | "pattern": "^([0-9]+\\.?[0-9]*|\\.[0-9]+)s$", 20 | "type": "string", 21 | "description": "A given endpoint must report load metrics continuously for at least this long before the endpoint weight will be used. This avoids churn when the set of endpoint addresses changes. Takes effect both immediately after we establish a connection to an endpoint and after weight_expiration_period has caused us to stop using the most recent load metrics. Default is 10 seconds.", 22 | "format": "regex" 23 | }, 24 | "weightExpirationPeriod": { 25 | "pattern": "^([0-9]+\\.?[0-9]*|\\.[0-9]+)s$", 26 | "type": "string", 27 | "description": "If a given endpoint has not reported load metrics in this long, then we stop using the reported weight. This ensures that we do not continue to use very stale weights. Once we stop using a stale value, if we later start seeing fresh reports again, the blackout_period applies. Defaults to 3 minutes.", 28 | "format": "regex" 29 | }, 30 | "weightUpdatePeriod": { 31 | "pattern": "^([0-9]+\\.?[0-9]*|\\.[0-9]+)s$", 32 | "type": "string", 33 | "description": "How often endpoint weights are recalculated. Default is 1 second.", 34 | "format": "regex" 35 | } 36 | }, 37 | "additionalProperties": true, 38 | "type": "object", 39 | "title": "Weighted Round Robin Lb Config", 40 | "description": "Configuration for weighted_round_robin LB policy." 41 | } 42 | } 43 | } -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/json_schemas/XdsServer.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "$ref": "#/definitions/XdsServer", 4 | "definitions": { 5 | "XdsServer": { 6 | "properties": { 7 | "server_uri": { 8 | "type": "string", 9 | "description": "Required." 10 | }, 11 | "channel_creds": { 12 | "items": { 13 | "$ref": "#/definitions/grpc.service_config.XdsServer.ChannelCredentials" 14 | }, 15 | "type": "array", 16 | "description": "A list of channel creds to use. The first supported type will be used." 17 | }, 18 | "server_features": { 19 | "items": { 20 | "oneOf": [ 21 | { 22 | "type": "array" 23 | }, 24 | { 25 | "type": "boolean" 26 | }, 27 | { 28 | "type": "number" 29 | }, 30 | { 31 | "type": "object" 32 | }, 33 | { 34 | "type": "string" 35 | } 36 | ], 37 | "title": "Value", 38 | "description": "`Value` represents a dynamically typed value which can be either null, a number, a string, a boolean, a recursive struct value, or a list of values. A producer of value is expected to set one of these variants. Absence of any variant indicates an error. The JSON representation for `Value` is JSON value." 39 | }, 40 | "type": "array", 41 | "description": "A repeated list of server features." 42 | } 43 | }, 44 | "additionalProperties": true, 45 | "type": "object", 46 | "title": "Xds Server", 47 | "description": "Represents an xDS server." 48 | }, 49 | "grpc.service_config.XdsServer.ChannelCredentials": { 50 | "properties": { 51 | "type": { 52 | "type": "string", 53 | "description": "Required." 54 | }, 55 | "config": { 56 | "additionalProperties": true, 57 | "type": "object", 58 | "description": "Optional JSON config." 59 | } 60 | }, 61 | "additionalProperties": true, 62 | "type": "object", 63 | "title": "Channel Credentials" 64 | } 65 | } 66 | } -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/json_schemas/update.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This script updates the JSON schemas in this directory by cloning the 4 | # relevant protos from Google and gRPC, and then running protoc-gen-jsonschema 5 | # on them. 6 | 7 | tmpdir="$(mktemp -d)" 8 | function cleanup() { 9 | rm -rf "$tmpdir" 10 | } 11 | 12 | trap cleanup EXIT 13 | 14 | cd "$(dirname "${BASH_SOURCE[0]}")" 15 | set -euo pipefail 16 | 17 | output_dir="$(pwd)" 18 | 19 | if ! command -v protoc-gen-jsonschema &>/dev/null; then 20 | go install "github.com/chrusty/protoc-gen-jsonschema/cmd/protoc-gen-jsonschema@latest" 21 | fi 22 | 23 | # Delete all existing JSON schemas. 24 | find . -name '*.json' -print0 | xargs -0 rm -f 25 | 26 | git_clones_dir="${tmpdir}/clones" 27 | 28 | mkdir -p "$git_clones_dir" 29 | cd "$git_clones_dir" 30 | 31 | function clone_at_commit() { 32 | local repo="$1" 33 | local commit="$2" 34 | local dir="$3" 35 | 36 | mkdir -p "$dir" 37 | 38 | pushd "$dir" 39 | 40 | git init 41 | git remote add origin "$repo" 42 | git fetch --depth 1 origin "$commit" 43 | git checkout FETCH_HEAD 44 | 45 | popd 46 | } 47 | 48 | # clone well-known protos from Google and gRPC protos 49 | clone_at_commit "git@github.com:googleapis/googleapis.git" "c959f4214cb3947aa42ded4a14610d0607fcd57a" "${git_clones_dir}/googleapis" 50 | clone_at_commit "git@github.com:grpc/grpc-proto.git" "6956c0ef3b8c21efb44992edc858fbae9414aa05" "${git_clones_dir}/grpc-proto" 51 | 52 | cd "$tmpdir" 53 | 54 | # prepare protos in a single directory 55 | cp -r "${git_clones_dir}/googleapis/google" . 56 | cp -r "${git_clones_dir}/grpc-proto/grpc" . 57 | cp "${git_clones_dir}/grpc-proto/grpc/service_config/service_config.proto" . 58 | 59 | # Generate JSON schemas from protos. 60 | 61 | protoc \ 62 | --jsonschema_opt=json_fieldnames \ 63 | --jsonschema_out="$output_dir" \ 64 | -I. \ 65 | service_config.proto 66 | -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/owner_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "testing" 7 | ) 8 | 9 | func TestOwner(t *testing.T) { 10 | path := filepath.Join(t.TempDir(), "owner.txt") 11 | 12 | alice := ownerChecker{ 13 | Path: path, 14 | Hostname: "alice", 15 | } 16 | bob := ownerChecker{ 17 | Path: path, 18 | Hostname: "bob", 19 | } 20 | 21 | assertSuccess := func(err error) { 22 | t.Helper() 23 | if err != nil { 24 | t.Fatal(err) 25 | } 26 | } 27 | assertFailed := func(err error) { 28 | t.Helper() 29 | if err == nil { 30 | t.Fatal("expected failure") 31 | } 32 | } 33 | 34 | assertSuccess(alice.Init()) // empty dir so success 35 | assertSuccess(alice.Check()) // alice took ownership above 36 | assertSuccess(bob.Init()) // bob is now the owner. Only debug logs about change of ownership. 37 | assertFailed(alice.Check()) // alice is not the owner anymore 38 | assertSuccess(bob.Check()) // bob is still the owner 39 | 40 | // Test what happens if someone corrupts the file 41 | if err := os.WriteFile(path, []byte("!corrupt"), 0o600); err != nil { 42 | t.Fatal(err) 43 | } 44 | assertFailed(alice.Check()) // corrupt so fail 45 | assertFailed(bob.Check()) // corrupt so fail 46 | assertSuccess(bob.Init()) // bob ovewrites corruption 47 | assertSuccess(bob.Check()) // bob is the owner 48 | assertFailed(alice.Check()) // alice is not the owner 49 | } 50 | -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/purge.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "path/filepath" 7 | "strings" 8 | 9 | "go.uber.org/multierr" 10 | 11 | "github.com/sourcegraph/zoekt/index" 12 | "github.com/sourcegraph/zoekt/internal/tenant" 13 | ) 14 | 15 | // purgeTenantShards removes all simple shards from dir on a best-effort basis. 16 | // It returns an error if there is no tenant in the context or if it encounters 17 | // an error while removing a shard. 18 | func purgeTenantShards(ctx context.Context, dir string) error { 19 | tnt, err := tenant.FromContext(ctx) 20 | if err != nil { 21 | return err 22 | } 23 | 24 | d, err := os.Open(dir) 25 | if err != nil { 26 | return err 27 | } 28 | defer d.Close() 29 | 30 | names, err := d.Readdirnames(-1) 31 | if err != nil { 32 | return err 33 | } 34 | 35 | var merr error 36 | for _, n := range names { 37 | path := filepath.Join(dir, n) 38 | fi, err := os.Stat(path) 39 | if err != nil { 40 | merr = multierr.Append(merr, err) 41 | continue 42 | } 43 | if fi.IsDir() || filepath.Ext(path) != ".zoekt" { 44 | continue 45 | } 46 | 47 | // Skip compound shards. 48 | if strings.HasPrefix(filepath.Base(path), "compound-") { 49 | continue 50 | } 51 | 52 | repos, _, err := index.ReadMetadataPath(path) 53 | if err != nil { 54 | merr = multierr.Append(merr, err) 55 | continue 56 | } 57 | // Since we excluded compound shards, we know there is exactly one repo 58 | if repos[0].TenantID == tnt.ID() { 59 | paths, err := index.IndexFilePaths(path) 60 | if err != nil { 61 | merr = multierr.Append(merr, err) 62 | continue 63 | } 64 | for _, p := range paths { 65 | if err := os.Remove(p); err != nil { 66 | merr = multierr.Append(merr, err) 67 | } 68 | } 69 | } 70 | } 71 | 72 | return merr 73 | } 74 | -------------------------------------------------------------------------------- /cmd/zoekt-sourcegraph-indexserver/sg_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "testing" 5 | "testing/quick" 6 | 7 | "github.com/google/go-cmp/cmp" 8 | "github.com/google/go-cmp/cmp/cmpopts" 9 | ) 10 | 11 | func TestIndexOptions_RoundTrip(t *testing.T) { 12 | var diff string 13 | f := func(original indexOptionsItem) bool { 14 | var converted indexOptionsItem 15 | converted.FromProto(original.ToProto()) 16 | 17 | options := []cmp.Option{ 18 | // The CloneURL field doesn't exist in the subset of fields that proto.ZoektIndexOptions contains. 19 | cmpopts.IgnoreFields(indexOptionsItem{}, "CloneURL"), 20 | } 21 | 22 | if diff = cmp.Diff(original, converted, options...); diff != "" { 23 | return false 24 | } 25 | return true 26 | } 27 | 28 | if err := quick.Check(f, nil); err != nil { 29 | t.Errorf("indexOptionsItem diff (-want +got):\n%s", diff) 30 | } 31 | } 32 | 33 | func TestUpdateIndexStatusRequest_RoundTrip(t *testing.T) { 34 | var diff string 35 | f := func(original updateIndexStatusRequest) bool { 36 | var converted updateIndexStatusRequest 37 | converted.FromProto(original.ToProto()) 38 | 39 | options := []cmp.Option{ 40 | cmpopts.EquateEmpty(), 41 | } 42 | 43 | if diff = cmp.Diff(original, converted, options...); diff != "" { 44 | return false 45 | } 46 | return true 47 | } 48 | 49 | if err := quick.Check(f, nil); err != nil { 50 | t.Errorf("updateIndexStatusRequest diff (-want +got):\n%s", diff) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /cmd/zoekt-webserver/grpc/server/sampling.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "math" 5 | 6 | "github.com/sourcegraph/zoekt" 7 | ) 8 | 9 | // newSamplingSender is a zoekt.Sender that samples stats events to avoid 10 | // sending many empty stats events over the wire. 11 | func newSamplingSender(next zoekt.Sender) *samplingSender { 12 | return &samplingSender{next: next} 13 | } 14 | 15 | type samplingSender struct { 16 | next zoekt.Sender 17 | agg zoekt.SearchResult 18 | aggCount int 19 | } 20 | 21 | func (s *samplingSender) Send(event *zoekt.SearchResult) { 22 | // We don't want to send events over the wire if they don't contain file 23 | // matches. Hence, in case we didn't find any results, we aggregate the stats 24 | // and send them out in regular intervals. 25 | if len(event.Files) == 0 { 26 | s.aggCount++ 27 | 28 | s.agg.Stats.Add(event.Stats) 29 | s.agg.Progress = event.Progress 30 | 31 | if s.aggCount%100 == 0 && !s.agg.Stats.Zero() { 32 | s.next.Send(&s.agg) 33 | s.agg = zoekt.SearchResult{} 34 | } 35 | 36 | return 37 | } 38 | 39 | // If we have aggregate stats, we merge them with the new event before sending 40 | // it. We drop agg.Progress, because we assume that event.Progress reflects the 41 | // latest status. 42 | if !s.agg.Stats.Zero() { 43 | event.Stats.Add(s.agg.Stats) 44 | s.agg = zoekt.SearchResult{} 45 | } 46 | 47 | s.next.Send(event) 48 | } 49 | 50 | // Flush sends any aggregated stats that we haven't sent yet 51 | func (s *samplingSender) Flush() { 52 | if !s.agg.Stats.Zero() { 53 | s.next.Send(&zoekt.SearchResult{ 54 | Stats: s.agg.Stats, 55 | Progress: zoekt.Progress{ 56 | Priority: math.Inf(-1), 57 | MaxPendingPriority: math.Inf(-1), 58 | }, 59 | }) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /cmd/zoekt-webserver/grpc/server/sampling_test.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/sourcegraph/zoekt" 7 | ) 8 | 9 | func TestSamplingStream(t *testing.T) { 10 | nonZeroStats := zoekt.Stats{ 11 | ContentBytesLoaded: 10, 12 | } 13 | filesEvent := &zoekt.SearchResult{ 14 | Files: make([]zoekt.FileMatch, 10), 15 | Stats: nonZeroStats, 16 | } 17 | fileEvents := func(n int) []*zoekt.SearchResult { 18 | res := make([]*zoekt.SearchResult, n) 19 | for i := range n { 20 | res[i] = filesEvent 21 | } 22 | return res 23 | } 24 | statsEvent := &zoekt.SearchResult{ 25 | Stats: nonZeroStats, 26 | } 27 | statsEvents := func(n int) []*zoekt.SearchResult { 28 | res := make([]*zoekt.SearchResult, n) 29 | for i := range n { 30 | res[i] = statsEvent 31 | } 32 | return res 33 | } 34 | cases := []struct { 35 | events []*zoekt.SearchResult 36 | beforeFlushCount int 37 | afterFlushCount int 38 | }{ 39 | // These test cases assume that the sampler only forwards 40 | // every 100 stats-only event. In case the sampling logic 41 | // changes, these tests are not valuable. 42 | {nil, 0, 0}, 43 | {fileEvents(1), 1, 1}, 44 | {fileEvents(2), 2, 2}, 45 | {fileEvents(200), 200, 200}, 46 | {append(fileEvents(1), statsEvents(1)...), 1, 2}, 47 | {append(fileEvents(1), statsEvents(2)...), 1, 2}, 48 | {append(fileEvents(1), statsEvents(99)...), 1, 2}, 49 | {append(fileEvents(1), statsEvents(100)...), 2, 2}, 50 | {statsEvents(500), 5, 5}, 51 | {statsEvents(501), 5, 6}, 52 | } 53 | 54 | for _, tc := range cases { 55 | count := 0 56 | ss := newSamplingSender(zoekt.SenderFunc(func(*zoekt.SearchResult) { 57 | count += 1 58 | })) 59 | 60 | for _, event := range tc.events { 61 | ss.Send(event) 62 | } 63 | if count != tc.beforeFlushCount { 64 | t.Fatalf("expected %d events, got %d", tc.beforeFlushCount, count) 65 | } 66 | ss.Flush() 67 | 68 | if count != tc.afterFlushCount { 69 | t.Fatalf("expected %d events, got %d", tc.afterFlushCount, count) 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /cmd/zoekt-webserver/metrics.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "path" 5 | 6 | "github.com/prometheus/client_golang/prometheus" 7 | "github.com/prometheus/procfs" 8 | sglog "github.com/sourcegraph/log" 9 | ) 10 | 11 | func mustRegisterMemoryMapMetrics(logger sglog.Logger) { 12 | logger = logger.Scoped("memoryMapMetrics") 13 | 14 | // The memory map metrics are collected via /proc, which 15 | // is only available on linux-based operating systems. 16 | 17 | // Instantiate shared FS objects for accessing /proc and /proc/self, 18 | // and skip metrics registration if we're aren't able to instantiate them 19 | // for whatever reason. 20 | 21 | fs, err := procfs.NewDefaultFS() 22 | if err != nil { 23 | logger.Debug( 24 | "skipping registration", 25 | sglog.String("reason", "failed to initialize proc FS"), 26 | sglog.String("error", err.Error()), 27 | ) 28 | 29 | return 30 | } 31 | 32 | info, err := fs.Self() 33 | if err != nil { 34 | logger.Debug( 35 | "skipping registration", 36 | sglog.String("path", path.Join(procfs.DefaultMountPoint, "self")), 37 | sglog.String("reason", "failed to initialize process info object for current process"), 38 | sglog.String("error", err.Error()), 39 | ) 40 | 41 | return 42 | } 43 | 44 | // Register Prometheus memory map metrics 45 | 46 | prometheus.MustRegister(prometheus.NewGaugeFunc(prometheus.GaugeOpts{ 47 | Name: "proc_metrics_memory_map_max_limit", 48 | Help: "Upper limit on amount of memory mapped regions a process may have.", 49 | }, func() float64 { 50 | vm, err := fs.VM() 51 | if err != nil { 52 | logger.Debug( 53 | "failed to read virtual memory statistics for the current process", 54 | sglog.String("path", path.Join(procfs.DefaultMountPoint, "sys", "vm")), 55 | sglog.String("error", err.Error()), 56 | ) 57 | 58 | return 0 59 | } 60 | 61 | if vm.MaxMapCount == nil { 62 | return 0 63 | } 64 | 65 | return float64(*vm.MaxMapCount) 66 | })) 67 | 68 | prometheus.MustRegister(prometheus.NewGaugeFunc(prometheus.GaugeOpts{ 69 | Name: "proc_metrics_memory_map_current_count", 70 | Help: "Amount of memory mapped regions this process is currently using.", 71 | }, func() float64 { 72 | procMaps, err := info.ProcMaps() 73 | if err != nil { 74 | logger.Debug( 75 | "failed to read memory mappings for current process", 76 | sglog.String("path", path.Join(procfs.DefaultMountPoint, "self", "maps")), 77 | sglog.String("error", err.Error()), 78 | ) 79 | 80 | return 0 81 | } 82 | 83 | return float64(len(procMaps)) 84 | })) 85 | } 86 | -------------------------------------------------------------------------------- /ctag-overlay.nix: -------------------------------------------------------------------------------- 1 | self: super: rec { 2 | my-universal-ctags = super.universal-ctags.overrideAttrs (old: rec { 3 | version = "6.1.0"; 4 | src = super.fetchFromGitHub { 5 | owner = "universal-ctags"; 6 | repo = "ctags"; 7 | rev = "v${version}"; 8 | sha256 = "sha256-f8+Ifjn7bhSYozOy7kn+zCLdHGrH3iFupHUZEGynz9Y="; 9 | }; 10 | # disable checks, else we get `make[1]: *** No rule to make target 'optlib/cmake.c'. Stop.` 11 | doCheck = false; 12 | checkFlags = [ ]; 13 | }); 14 | 15 | # Skip building if same ctags version as registry 16 | universal-ctags = if super.universal-ctags.version == my-universal-ctags.version then super.universal-ctags else my-universal-ctags; 17 | } 18 | -------------------------------------------------------------------------------- /doc/ctags.md: -------------------------------------------------------------------------------- 1 | 2 | CTAGS 3 | ===== 4 | 5 | Ctags generates indices of symbol definitions in source files. It 6 | started its life as part of the BSD Unix, but there are several more 7 | modern flavors. Zoekt supports 8 | [universal-ctags](https://github.com/universal-ctags). 9 | 10 | It is strongly recommended to use Universal Ctags, [version 11 | `db3d9a6`](https://github.com/universal-ctags/ctags/commit/4ff09da9b0a36a9e75c92f4be05d476b35b672cd) 12 | or newer, running on the Linux platform. 13 | 14 | From this version on, universal ctags will be called using seccomp, 15 | which guarantees that security problems in ctags cannot escalate to 16 | access to the indexing machine. 17 | 18 | Ubuntu, Debian and Arch provide universal ctags with seccomp support 19 | compiled in. Zoekt expects the `universal-ctags` binary to be on 20 | `$PATH`. Note: only Ubuntu names the binary `universal-ctags`, while 21 | most distributions name it `ctags`. 22 | 23 | Use the following invocation to compile and install universal-ctags: 24 | 25 | ``` 26 | sudo apt-get install 27 | pkg-config autoconf \ 28 | libseccomp-dev libseccomp \ 29 | libjansson-dev libjansson 30 | 31 | ./autogen.sh 32 | LDFLAGS=-static ./configure --enable-json --enable-seccomp 33 | make -j4 34 | 35 | # create tarball 36 | NAME=ctags-$(date --iso-8601=minutes | tr -d ':' | sed 's|\+.*$||')-$(git show --pretty=format:%h -q) 37 | mkdir ${NAME} 38 | cp ctags ${NAME}/universal-ctags 39 | tar zcf ${NAME}.tar.gz ${NAME}/ 40 | ``` 41 | -------------------------------------------------------------------------------- /doc/indexing.md: -------------------------------------------------------------------------------- 1 | 2 | # Configuration parameters 3 | 4 | Parameters are in the `zoekt` section of the git-config. 5 | 6 | * `name`: name of the repository, typically HOST/PATH, eg. `github.com/hanwen/usb`. 7 | 8 | * `web-url`: base URL for linking to files, commits, and the repository, eg. 9 | `https://github.com/hanwen/usb` 10 | 11 | * `web-url-type`: type of URL, eg. github. Supported are cgit, 12 | gitiles, gitweb, cgit and gitea. 13 | 14 | * `github-stars`, `github-forks`, `github-watchers`, 15 | `github-subscribers`: counters for github interactions 16 | 17 | ## Examples 18 | 19 | ### gitea 20 | 21 | Clone a remote repository and add the indexer configuration. 22 | 23 | ```sh 24 | git clone --bare https://codeberg.org/Codeberg/gitea 25 | cd gitea.git 26 | git config zoekt.web-url-type gitea 27 | git config zoekt.web-url https://codeberg.org/Codeberg/gitea 28 | git config zoekt.name codeberg.org/Codeberg/gitea 29 | ``` 30 | 31 | The tail of the git *config* should then contain: 32 | 33 | ```ini 34 | [zoekt] 35 | web-url-type = gitea 36 | web-url = https://codeberg.org/Codeberg/gitea 37 | name = codeberg.org/Codeberg/gitea 38 | ``` 39 | 40 | The *gitea.git* repository can then be indexed with `zoekt-git-index` 41 | 42 | ```sh 43 | zoekt-git-index --branches main -index /data/index -repo_cache /data/repos gitea.git 44 | ``` 45 | -------------------------------------------------------------------------------- /doc/json-api.md: -------------------------------------------------------------------------------- 1 | # API 2 | 3 | When running `zoekt-webserver` with the `-rpc` option there will be a JSON HTTP API available for searches at `/api/search`: 4 | 5 | ``` 6 | curl -XPOST -d '{"Q":"needle"}' 'http://127.0.0.1:6070/api/search' 7 | ``` 8 | 9 | ## Filtering by repository IDs 10 | 11 | If your projects are indexed with a `repoid` (added automatically by some 12 | indexers) then you can filter your searches to a subset of repositories 13 | efficiently using the `RepoIDs` filter: 14 | 15 | ``` 16 | curl -XPOST -d '{"Q":"needle","RepoIDs":[1234,4567]}' 'http://34.120.239.98/api/search' 17 | ``` 18 | 19 | ## Options 20 | 21 | There are multiple options that can be passed under `Opts` which can also be 22 | found at 23 | [SearchOptions](https://github.com/sourcegraph/zoekt/blob/58cf4748830ac0eded1517cc8c2454694c531fbd/api.go#L470). 24 | 25 | ``` 26 | curl -XPOST -d '{"Q":"needle","Opts":{"EstimateDocCount":true,"NumContextLines":10}}' 'http://34.120.239.98/api/search' 27 | ``` 28 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "nixpkgs": { 4 | "locked": { 5 | "lastModified": 1736798957, 6 | "narHash": "sha256-qwpCtZhSsSNQtK4xYGzMiyEDhkNzOCz/Vfu4oL2ETsQ=", 7 | "owner": "NixOS", 8 | "repo": "nixpkgs", 9 | "rev": "9abb87b552b7f55ac8916b6fc9e5cb486656a2f3", 10 | "type": "github" 11 | }, 12 | "original": { 13 | "id": "nixpkgs", 14 | "ref": "nixos-unstable", 15 | "type": "indirect" 16 | } 17 | }, 18 | "root": { 19 | "inputs": { 20 | "nixpkgs": "nixpkgs" 21 | } 22 | } 23 | }, 24 | "root": "root", 25 | "version": 7 26 | } 27 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | description = "The Zoekt developer environment Nix Flake"; 3 | 4 | inputs = { nixpkgs.url = "nixpkgs/nixos-unstable"; }; 5 | 6 | outputs = { self, nixpkgs }: { 7 | devShells = nixpkgs.lib.genAttrs [ 8 | "x86_64-linux" 9 | "aarch64-linux" 10 | "aarch64-darwin" 11 | "x86_64-darwin" 12 | ] (system: 13 | let 14 | pkgs = import nixpkgs { 15 | inherit system; 16 | overlays = [ self.overlays.ctags ]; 17 | }; 18 | in { default = import ./shell.nix { inherit pkgs; }; }); 19 | # Pin a specific version of universal-ctags to the same version as in ./install-ctags-alpine.sh. 20 | overlays.ctags = import ./ctag-overlay.nix; 21 | }; 22 | } 23 | -------------------------------------------------------------------------------- /gen-proto.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd "$(dirname "${BASH_SOURCE[0]}")" 4 | set -euo pipefail 5 | 6 | find . -name "buf.gen.yaml" -not -path ".git" | while read -r buf_yaml; do 7 | pushd "$(dirname "${buf_yaml}")" >/dev/null 8 | 9 | if ! buf generate; then 10 | echo "failed to generate ${buf_yaml}" >&2 11 | exit 1 12 | fi 13 | 14 | popd >/dev/null 15 | done 16 | -------------------------------------------------------------------------------- /grpc/chunk/chunker.go: -------------------------------------------------------------------------------- 1 | // Package chunk provides a utility for sending sets of protobuf messages in 2 | // groups of smaller chunks. This is useful for gRPC, which has limitations around the maximum 3 | // size of a message that you can send. 4 | // 5 | // This code is adapted from the gitaly project, which is licensed 6 | // under the MIT license. A copy of that license text can be found at 7 | // https://mit-license.org/. 8 | // 9 | // The code this file was based off can be found here: https://gitlab.com/gitlab-org/gitaly/-/blob/v16.2.0/internal/helper/chunk/chunker.go 10 | package chunk 11 | 12 | import ( 13 | "google.golang.org/protobuf/proto" 14 | ) 15 | 16 | // New returns a new Chunker that will use the given sendFunc to send chunks of messages. 17 | func New[T proto.Message](sendFunc func([]T) error) *Chunker[T] { 18 | return &Chunker[T]{sendFunc: sendFunc} 19 | } 20 | 21 | // Chunker lets you spread items you want to send over multiple chunks. 22 | // This type is not thread-safe. 23 | type Chunker[T proto.Message] struct { 24 | sendFunc func([]T) error // sendFunc is the function that will be invoked when a chunk is ready to be sent. 25 | 26 | buffer []T // buffer stores the items that will be sent when the sendFunc is invoked. 27 | sizeBytes int // sizeBytes is the size of the current chunk in bytes. 28 | } 29 | 30 | // maxMessageSize is the maximum size per protobuf message 31 | const maxMessageSize = 1 * 1024 * 1024 // 1 MiB 32 | 33 | // Send will append the provided items to the current chunk, and send the chunk if it is full. 34 | // 35 | // Callers should ensure that they call Flush() after the last call to Send(). 36 | func (c *Chunker[T]) Send(items ...T) error { 37 | for _, item := range items { 38 | if err := c.sendOne(item); err != nil { 39 | return err 40 | } 41 | } 42 | 43 | return nil 44 | } 45 | 46 | func (c *Chunker[T]) sendOne(item T) error { 47 | itemSize := proto.Size(item) 48 | 49 | if itemSize+c.sizeBytes >= maxMessageSize { 50 | if err := c.sendResponseMsg(); err != nil { 51 | return err 52 | } 53 | } 54 | 55 | c.buffer = append(c.buffer, item) 56 | c.sizeBytes += itemSize 57 | 58 | return nil 59 | } 60 | 61 | func (c *Chunker[T]) sendResponseMsg() error { 62 | c.sizeBytes = 0 63 | 64 | err := c.sendFunc(c.buffer) 65 | if err != nil { 66 | return err 67 | } 68 | 69 | c.buffer = c.buffer[:0] 70 | return nil 71 | } 72 | 73 | // Flush sends remaining items in the current chunk, if any. 74 | func (c *Chunker[T]) Flush() error { 75 | if len(c.buffer) == 0 { 76 | return nil 77 | } 78 | 79 | err := c.sendResponseMsg() 80 | if err != nil { 81 | return err 82 | } 83 | 84 | return nil 85 | } 86 | 87 | // SendAll is a convenience function that immediately sends all provided items in smaller chunks using the provided 88 | // sendFunc. 89 | // 90 | // See the documentation for Chunker.Send() for more information. 91 | func SendAll[T proto.Message](sendFunc func([]T) error, items ...T) error { 92 | c := New(sendFunc) 93 | 94 | err := c.Send(items...) 95 | if err != nil { 96 | return err 97 | } 98 | 99 | return c.Flush() 100 | } 101 | -------------------------------------------------------------------------------- /grpc/defaults/server.go: -------------------------------------------------------------------------------- 1 | package defaults 2 | 3 | import ( 4 | "sync" 5 | 6 | grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" 7 | "github.com/prometheus/client_golang/prometheus" 8 | sglog "github.com/sourcegraph/log" 9 | "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" 10 | "google.golang.org/grpc" 11 | "google.golang.org/grpc/reflection" 12 | 13 | "github.com/sourcegraph/zoekt/grpc/internalerrs" 14 | "github.com/sourcegraph/zoekt/grpc/messagesize" 15 | "github.com/sourcegraph/zoekt/grpc/propagator" 16 | "github.com/sourcegraph/zoekt/internal/tenant" 17 | ) 18 | 19 | func NewServer(logger sglog.Logger, additionalOpts ...grpc.ServerOption) *grpc.Server { 20 | metrics := serverMetricsOnce() 21 | 22 | opts := []grpc.ServerOption{ 23 | grpc.ChainStreamInterceptor( 24 | propagator.StreamServerPropagator(tenant.Propagator{}), 25 | tenant.StreamServerInterceptor, 26 | otelgrpc.StreamServerInterceptor(), 27 | metrics.StreamServerInterceptor(), 28 | messagesize.StreamServerInterceptor, 29 | internalerrs.LoggingStreamServerInterceptor(logger), 30 | ), 31 | grpc.ChainUnaryInterceptor( 32 | propagator.UnaryServerPropagator(tenant.Propagator{}), 33 | tenant.UnaryServerInterceptor, 34 | otelgrpc.UnaryServerInterceptor(), 35 | metrics.UnaryServerInterceptor(), 36 | messagesize.UnaryServerInterceptor, 37 | internalerrs.LoggingUnaryServerInterceptor(logger), 38 | ), 39 | } 40 | 41 | opts = append(opts, additionalOpts...) 42 | 43 | // Ensure that the message size options are set last, so they override any other 44 | // server-specific options that tweak the message size. 45 | // 46 | // The message size options are only provided if the environment variable is set. These options serve as an escape hatch, so they 47 | // take precedence over everything else with a uniform size setting that's easy to reason about. 48 | opts = append(opts, messagesize.MustGetServerMessageSizeFromEnv()...) 49 | 50 | s := grpc.NewServer(opts...) 51 | reflection.Register(s) 52 | return s 53 | } 54 | 55 | // serviceMetricsOnce returns a singleton instance of the server metrics 56 | // that are shared across all gRPC servers that this process creates. 57 | // 58 | // This function panics if the metrics cannot be registered with the default 59 | // Prometheus registry. 60 | var serverMetricsOnce = sync.OnceValue(func() *grpcprom.ServerMetrics { 61 | serverMetrics := grpcprom.NewServerMetrics( 62 | grpcprom.WithServerCounterOptions(), 63 | grpcprom.WithServerHandlingTimeHistogram(), // record the overall response latency for a gRPC request) 64 | ) 65 | prometheus.DefaultRegisterer.MustRegister(serverMetrics) 66 | return serverMetrics 67 | }) 68 | -------------------------------------------------------------------------------- /grpc/grpcutil/util.go: -------------------------------------------------------------------------------- 1 | package grpcutil 2 | 3 | import ( 4 | "net/http" 5 | "strings" 6 | 7 | "golang.org/x/net/http2" 8 | "golang.org/x/net/http2/h2c" 9 | "google.golang.org/grpc" 10 | ) 11 | 12 | // SplitMethodName splits a full gRPC method name (e.g. "/package.service/method") in to its individual components (service, method) 13 | // 14 | // Copied from github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/reporter.go 15 | func SplitMethodName(fullMethod string) (string, string) { 16 | fullMethod = strings.TrimPrefix(fullMethod, "/") // remove leading slash 17 | if i := strings.Index(fullMethod, "/"); i >= 0 { 18 | return fullMethod[:i], fullMethod[i+1:] 19 | } 20 | return "unknown", "unknown" 21 | } 22 | 23 | // MultiplexGRPC takes a gRPC server and a plain HTTP handler and multiplexes the 24 | // request handling. Any requests that declare themselves as gRPC requests are routed 25 | // to the gRPC server, all others are routed to the httpHandler. 26 | func MultiplexGRPC(grpcServer *grpc.Server, httpHandler http.Handler) http.Handler { 27 | newHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 28 | if r.ProtoMajor == 2 && strings.Contains(r.Header.Get("Content-Type"), "application/grpc") { 29 | grpcServer.ServeHTTP(w, r) 30 | } else { 31 | httpHandler.ServeHTTP(w, r) 32 | } 33 | }) 34 | 35 | // Until we enable TLS, we need to fall back to the h2c protocol, which is 36 | // basically HTTP2 without TLS. The standard library does not implement the 37 | // h2s protocol, so this hijacks h2s requests and handles them correctly. 38 | return h2c.NewHandler(newHandler, &http2.Server{}) 39 | } 40 | -------------------------------------------------------------------------------- /grpc/grpcutil/util_test.go: -------------------------------------------------------------------------------- 1 | package grpcutil 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/google/go-cmp/cmp" 7 | ) 8 | 9 | func TestSplitMethodName(t *testing.T) { 10 | testCases := []struct { 11 | name string 12 | 13 | fullMethod string 14 | wantService string 15 | wantMethod string 16 | }{ 17 | { 18 | name: "full method with service and method", 19 | 20 | fullMethod: "/package.service/method", 21 | wantService: "package.service", 22 | wantMethod: "method", 23 | }, 24 | { 25 | name: "method without leading slash", 26 | 27 | fullMethod: "package.service/method", 28 | wantService: "package.service", 29 | wantMethod: "method", 30 | }, 31 | { 32 | name: "service without method", 33 | 34 | fullMethod: "/package.service/", 35 | wantService: "package.service", 36 | wantMethod: "", 37 | }, 38 | { 39 | name: "empty input", 40 | 41 | fullMethod: "", 42 | wantService: "unknown", 43 | wantMethod: "unknown", 44 | }, 45 | } 46 | 47 | for _, tc := range testCases { 48 | t.Run(tc.name, func(t *testing.T) { 49 | service, method := SplitMethodName(tc.fullMethod) 50 | if diff := cmp.Diff(service, tc.wantService); diff != "" { 51 | t.Errorf("splitMethodName(%q) service (-want +got):\n%s", tc.fullMethod, diff) 52 | } 53 | 54 | if diff := cmp.Diff(method, tc.wantMethod); diff != "" { 55 | t.Errorf("splitMethodName(%q) method (-want +got):\n%s", tc.fullMethod, diff) 56 | } 57 | }) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /grpc/messagesize/messagesize_test.go: -------------------------------------------------------------------------------- 1 | package messagesize 2 | 3 | import ( 4 | "errors" 5 | "math" 6 | "testing" 7 | 8 | "github.com/google/go-cmp/cmp" 9 | ) 10 | 11 | func TestGetMessageSizeBytesFromString(t *testing.T) { 12 | t.Run("8 MB", func(t *testing.T) { 13 | sizeString := "8MB" 14 | 15 | size, err := getMessageSizeBytesFromString(sizeString, 0, math.MaxInt) 16 | if err != nil { 17 | t.Fatalf("unexpected error: %s", err) 18 | } 19 | 20 | expectedSize := 8 * 1000 * 1000 21 | if diff := cmp.Diff(expectedSize, size); diff != "" { 22 | t.Fatalf("unexpected size (-want +got):\n%s", diff) 23 | } 24 | }) 25 | 26 | t.Run("just small enough", func(t *testing.T) { 27 | sizeString := "4MB" // inside large-end of range 28 | 29 | fourMegaBytes := 4 * 1000 * 1000 30 | size, err := getMessageSizeBytesFromString(sizeString, 0, uint64(fourMegaBytes)) 31 | if err != nil { 32 | t.Fatalf("unexpected error: %s", err) 33 | } 34 | 35 | if diff := cmp.Diff(fourMegaBytes, size); diff != "" { 36 | t.Fatalf("unexpected size (-want +got):\n%s", diff) 37 | } 38 | }) 39 | 40 | t.Run("just large enough", func(t *testing.T) { 41 | sizeString := "4MB" // inside low-end of range 42 | 43 | fourMegaBytes := 4 * 1000 * 1000 44 | size, err := getMessageSizeBytesFromString(sizeString, uint64(fourMegaBytes), math.MaxInt) 45 | if err != nil { 46 | t.Fatalf("unexpected error: %s", err) 47 | } 48 | 49 | if diff := cmp.Diff(fourMegaBytes, size); diff != "" { 50 | t.Fatalf("unexpected size (-want +got):\n%s", diff) 51 | } 52 | }) 53 | 54 | t.Run("invalid size", func(t *testing.T) { 55 | sizeString := "this-is-not-a-size" 56 | 57 | _, err := getMessageSizeBytesFromString(sizeString, 0, math.MaxInt) 58 | var expectedErr *parseError 59 | if !errors.As(err, &expectedErr) { 60 | t.Fatalf("expected parseError, got error %q", err) 61 | } 62 | }) 63 | 64 | t.Run("empty", func(t *testing.T) { 65 | sizeString := "" 66 | 67 | _, err := getMessageSizeBytesFromString(sizeString, 0, math.MaxInt) 68 | var expectedErr *parseError 69 | if !errors.As(err, &expectedErr) { 70 | t.Fatalf("expected parseError, got error %q", err) 71 | } 72 | }) 73 | 74 | t.Run("too large", func(t *testing.T) { 75 | sizeString := "4MB" // above range 76 | 77 | twoMegaBytes := 2 * 1024 * 1024 78 | _, err := getMessageSizeBytesFromString(sizeString, 0, uint64(twoMegaBytes)) 79 | var expectedErr *sizeOutOfRangeError 80 | if !errors.As(err, &expectedErr) { 81 | t.Fatalf("expected sizeOutOfRangeError, got error %q", err) 82 | } 83 | }) 84 | 85 | t.Run("too small", func(t *testing.T) { 86 | sizeString := "1MB" // below range 87 | 88 | twoMegaBytes := 2 * 1024 * 1024 89 | _, err := getMessageSizeBytesFromString(sizeString, uint64(twoMegaBytes), math.MaxInt) 90 | var expectedErr *sizeOutOfRangeError 91 | if !errors.As(err, &expectedErr) { 92 | t.Fatalf("expected sizeOutOfRangeError, got error %q", err) 93 | } 94 | }) 95 | } 96 | -------------------------------------------------------------------------------- /grpc/propagator/propagator.go: -------------------------------------------------------------------------------- 1 | package propagator 2 | 3 | import ( 4 | "context" 5 | 6 | "google.golang.org/grpc" 7 | "google.golang.org/grpc/metadata" 8 | ) 9 | 10 | // Propagator is a type that can extract some information from a context.Context, 11 | // returning it in the form of metadata.MD and can also inject that same metadata 12 | // back into a context on the server side of an RPC call. 13 | type Propagator interface { 14 | // FromContext extracts the information to be propagated from a context, 15 | // converting it to a metadata.MD. This will be called on the client side 16 | // of an RPC. 17 | FromContext(context.Context) metadata.MD 18 | 19 | // InjectContext takes a context and some metadata and creates a new context 20 | // with the information from the metadata injected into the context. 21 | // This will be called on the server side of an RPC. 22 | InjectContext(context.Context, metadata.MD) (context.Context, error) 23 | } 24 | 25 | // StreamServerPropagator returns an interceptor that will use the given propagator 26 | // to translate some metadata back into the context for the RPC handler. The client 27 | // should be configured with an interceptor that uses the same propagator. 28 | func StreamServerPropagator(prop Propagator) grpc.StreamServerInterceptor { 29 | return func( 30 | srv any, 31 | ss grpc.ServerStream, 32 | info *grpc.StreamServerInfo, 33 | handler grpc.StreamHandler, 34 | ) error { 35 | ctx := ss.Context() 36 | md, ok := metadata.FromIncomingContext(ctx) 37 | if ok { 38 | var err error 39 | ctx, err = prop.InjectContext(ss.Context(), md) 40 | if err != nil { 41 | return err 42 | } 43 | ss = contextedServerStream{ss, ctx} 44 | } 45 | return handler(srv, ss) 46 | } 47 | } 48 | 49 | // UnaryServerPropagator returns an interceptor that will use the given propagator 50 | // to translate some metadata back into the context for the RPC handler. The client 51 | // should be configured with an interceptor that uses the same propagator. 52 | func UnaryServerPropagator(prop Propagator) grpc.UnaryServerInterceptor { 53 | return func( 54 | ctx context.Context, 55 | req any, 56 | info *grpc.UnaryServerInfo, 57 | handler grpc.UnaryHandler, 58 | ) (resp any, err error) { 59 | md, ok := metadata.FromIncomingContext(ctx) 60 | if ok { 61 | ctx, err = prop.InjectContext(ctx, md) 62 | if err != nil { 63 | return nil, err 64 | } 65 | } 66 | return handler(ctx, req) 67 | } 68 | } 69 | 70 | type contextedServerStream struct { 71 | grpc.ServerStream 72 | ctx context.Context 73 | } 74 | 75 | func (css contextedServerStream) Context() context.Context { 76 | return css.ctx 77 | } 78 | -------------------------------------------------------------------------------- /grpc/protos/README.md: -------------------------------------------------------------------------------- 1 | # Webserver protobuf definitions 2 | 3 | This directory contains protobuf definitions for the webserver gRPC API. 4 | 5 | To generate the Go code, run this script from the repository root: 6 | 7 | ```sh 8 | ./gen-proto.sh 9 | ``` 10 | 11 | Note: this script will regenerate all protos in the project, not just the ones in this directory. 12 | -------------------------------------------------------------------------------- /grpc/protos/buf.gen.yaml: -------------------------------------------------------------------------------- 1 | # Configuration file for https://buf.build/, which we use for Protobuf code generation. 2 | version: v1 3 | plugins: 4 | - plugin: buf.build/protocolbuffers/go:v1.29.1 5 | out: . 6 | opt: 7 | - paths=source_relative 8 | - plugin: buf.build/grpc/go:v1.3.0 9 | out: . 10 | opt: 11 | - paths=source_relative 12 | -------------------------------------------------------------------------------- /grpc/protos/buf.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | breaking: 3 | use: 4 | - FILE 5 | lint: 6 | use: 7 | - DEFAULT -------------------------------------------------------------------------------- /grpc/testprotos/news/v1/buf.gen.yaml: -------------------------------------------------------------------------------- 1 | # Configuration file for https://buf.build/, which we use for Protobuf code generation. 2 | version: v1 3 | plugins: 4 | - plugin: buf.build/protocolbuffers/go:v1.29.1 5 | out: . 6 | opt: 7 | - paths=source_relative 8 | -------------------------------------------------------------------------------- /grpc/testprotos/news/v1/news.proto: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | // 5 | // Note (@Sourcegraph): This file was copied / adapted from 6 | // https://github.com/protocolbuffers/protobuf-go/blob/v1.30.0/internal/testprotos/news/news.proto to aid our testing. 7 | 8 | syntax = "proto3"; 9 | 10 | package grpc.testprotos.news.v1; 11 | 12 | import "google/protobuf/timestamp.proto"; 13 | 14 | option go_package = "github.com/sourcegraph/zoekt/grpc/testprotos/news/v1"; 15 | 16 | message Article { 17 | enum Status { 18 | STATUS_DRAFT_UNSPECIFIED = 0; 19 | STATUS_PUBLISHED = 1; 20 | STATUS_REVOKED = 2; 21 | } 22 | 23 | string author = 1; 24 | google.protobuf.Timestamp date = 2; 25 | string title = 3; 26 | string content = 4; 27 | Status status = 8; 28 | repeated Attachment attachments = 7; 29 | } 30 | 31 | message Attachment { 32 | oneof contents { 33 | BinaryAttachment binary_attachment = 1; 34 | KeyValueAttachment key_value_attachment = 2; 35 | } 36 | } 37 | 38 | message BinaryAttachment { 39 | string name = 1; 40 | bytes data = 2; 41 | } 42 | 43 | message KeyValueAttachment { 44 | string name = 1; 45 | map data = 2; 46 | } 47 | -------------------------------------------------------------------------------- /ignore/ignore.go: -------------------------------------------------------------------------------- 1 | // Licensed under the Apache License, Version 2.0 (the "License"); 2 | // you may not use this file except in compliance with the License. 3 | // You may obtain a copy of the License at 4 | // 5 | // http://www.apache.org/licenses/LICENSE-2.0 6 | // 7 | // Unless required by applicable law or agreed to in writing, software 8 | // distributed under the License is distributed on an "AS IS" BASIS, 9 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | // See the License for the specific language governing permissions and 11 | // limitations under the License. 12 | 13 | // Package ignore provides helpers to support ignore-files similar to .gitignore 14 | package ignore 15 | 16 | import ( 17 | "bufio" 18 | "io" 19 | "strings" 20 | 21 | "github.com/gobwas/glob" 22 | ) 23 | 24 | var ( 25 | lineComment = "#" 26 | IgnoreFile = ".sourcegraph/ignore" 27 | ) 28 | 29 | type Matcher struct { 30 | ignoreList []glob.Glob 31 | } 32 | 33 | // ParseIgnoreFile parses an ignore-file according to the following rules 34 | // 35 | // - each line represents a glob-pattern relative to the root of the repository 36 | // - for patterns without any glob-characters, a trailing ** is implicit 37 | // - lines starting with # are ignored 38 | // - empty lines are ignored 39 | func ParseIgnoreFile(r io.Reader) (matcher *Matcher, error error) { 40 | var patterns []glob.Glob 41 | scanner := bufio.NewScanner(r) 42 | for scanner.Scan() { 43 | line := strings.TrimSpace(scanner.Text()) 44 | // ignore empty lines 45 | if line == "" { 46 | continue 47 | } 48 | // ignore comments 49 | if strings.HasPrefix(line, lineComment) { 50 | continue 51 | } 52 | line = strings.TrimPrefix(line, "/") 53 | // implicit ** for patterns without glob-characters 54 | if !strings.ContainsAny(line, ".][*?") { 55 | line += "**" 56 | } 57 | // with separators = '/', * becomes path-aware 58 | pattern, err := glob.Compile(line, '/') 59 | if err != nil { 60 | return nil, err 61 | } 62 | patterns = append(patterns, pattern) 63 | } 64 | return &Matcher{ignoreList: patterns}, scanner.Err() 65 | } 66 | 67 | // Match returns true if path has a prefix in common with any item in m.ignoreList 68 | func (m *Matcher) Match(path string) bool { 69 | if len(m.ignoreList) == 0 { 70 | return false 71 | } 72 | for _, pattern := range m.ignoreList { 73 | if pattern.Match(path) { 74 | return true 75 | } 76 | } 77 | return false 78 | } 79 | -------------------------------------------------------------------------------- /ignore/ignore_test.go: -------------------------------------------------------------------------------- 1 | package ignore 2 | 3 | import ( 4 | "bytes" 5 | "reflect" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/gobwas/glob" 10 | ) 11 | 12 | func TestParseIgnoreFile(t *testing.T) { 13 | tests := []struct { 14 | ignoreFile []byte 15 | wantIgnoreList []glob.Glob 16 | }{ 17 | { 18 | ignoreFile: []byte("# ignore this \n \n foo\n bar/"), 19 | wantIgnoreList: []glob.Glob{ 20 | glob.MustCompile("foo**", '/'), 21 | glob.MustCompile("bar/**", '/'), 22 | }, 23 | }, 24 | { 25 | ignoreFile: []byte("/foo/bar \n /qux \n *.go\nfoo.go"), 26 | wantIgnoreList: []glob.Glob{ 27 | glob.MustCompile("foo/bar**", '/'), 28 | glob.MustCompile("qux**", '/'), 29 | glob.MustCompile("*.go", '/'), 30 | glob.MustCompile("foo.go", '/'), 31 | }, 32 | }, 33 | } 34 | 35 | for _, tt := range tests { 36 | m, err := ParseIgnoreFile(bytes.NewReader(tt.ignoreFile)) 37 | if err != nil { 38 | t.Error(err) 39 | } 40 | if !reflect.DeepEqual(m.ignoreList, tt.wantIgnoreList) { 41 | t.Errorf("got %v, expected %v", m.ignoreList, tt.wantIgnoreList) 42 | } 43 | } 44 | } 45 | 46 | func TestIgnoreMatcher(t *testing.T) { 47 | ignoreFile := ` 48 | dir1/ 49 | *.go 50 | **/data.* 51 | ` 52 | ig, err := ParseIgnoreFile(strings.NewReader(ignoreFile)) 53 | if err != nil { 54 | t.Errorf("error in ignoreFile") 55 | } 56 | tests := []struct { 57 | path string 58 | wantMatch bool 59 | }{ 60 | { 61 | path: "dir1/readme.md", 62 | wantMatch: true, 63 | }, 64 | { 65 | path: "dir1/dir2/readme.md", 66 | wantMatch: true, 67 | }, 68 | 69 | { 70 | path: "foo.go", 71 | wantMatch: true, 72 | }, 73 | { 74 | path: "dir2/foo.go", 75 | wantMatch: false, 76 | }, 77 | { 78 | path: "dir3/data.xyz", 79 | wantMatch: true, 80 | }, 81 | } 82 | for _, tt := range tests { 83 | t.Run(tt.path, func(t *testing.T) { 84 | if got := ig.Match(tt.path); got != tt.wantMatch { 85 | t.Errorf("got %t, expected %t", got, tt.wantMatch) 86 | } 87 | }) 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /index/document.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import "github.com/sourcegraph/zoekt" 4 | 5 | // Document holds a document (file) to index. 6 | type Document struct { 7 | Name string 8 | Content []byte 9 | Branches []string 10 | SubRepositoryPath string 11 | Language string 12 | Category FileCategory 13 | 14 | SkipReason SkipReason 15 | 16 | // Document sections for symbols. Offsets should use bytes. 17 | Symbols []DocumentSection 18 | SymbolsMetaData []*zoekt.Symbol 19 | } 20 | 21 | type SkipReason int 22 | 23 | const ( 24 | SkipReasonNone SkipReason = iota 25 | SkipReasonTooLarge 26 | SkipReasonTooSmall 27 | SkipReasonBinary 28 | SkipReasonTooManyTrigrams 29 | ) 30 | 31 | func (s SkipReason) explanation() string { 32 | switch s { 33 | case SkipReasonNone: 34 | return "" 35 | case SkipReasonTooLarge: 36 | return "exceeds the maximum size limit" 37 | case SkipReasonTooSmall: 38 | return "contains too few trigrams" 39 | case SkipReasonBinary: 40 | return "contains binary content" 41 | case SkipReasonTooManyTrigrams: 42 | return "contains too many trigrams" 43 | default: 44 | return "unknown skip reason" 45 | } 46 | } 47 | 48 | type DocumentSection struct { 49 | Start, End uint32 50 | } 51 | -------------------------------------------------------------------------------- /index/file_category_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestDetermineFileCategory(t *testing.T) { 8 | tests := []struct { 9 | name string 10 | filename string 11 | content []byte 12 | want FileCategory 13 | }{ 14 | { 15 | name: "test file", 16 | filename: "foo_test.go", 17 | content: []byte("package foo"), 18 | want: FileCategoryTest, 19 | }, 20 | { 21 | name: "vendor file", 22 | filename: "vendor/foo.go", 23 | content: []byte("package foo"), 24 | want: FileCategoryVendored, 25 | }, 26 | { 27 | name: "generated file", 28 | filename: "foo.go", 29 | content: []byte("// Code generated by protoc-gen-go. DO NOT EDIT.\n" + 30 | "... some generated code ..."), 31 | want: FileCategoryGenerated, 32 | }, 33 | { 34 | name: "config file", 35 | filename: "package.json", 36 | content: []byte("{}"), 37 | want: FileCategoryConfig, 38 | }, 39 | { 40 | name: "dot file", 41 | filename: ".gitignore", 42 | content: []byte("*.o"), 43 | want: FileCategoryDotFile, 44 | }, 45 | { 46 | name: "documentation file", 47 | filename: "README.md", 48 | content: []byte("# Documentation"), 49 | want: FileCategoryDocumentation, 50 | }, 51 | { 52 | name: "default file", 53 | filename: "main.go", 54 | content: []byte("package main"), 55 | want: FileCategoryDefault, 56 | }, 57 | } 58 | 59 | for _, tt := range tests { 60 | t.Run(tt.name, func(t *testing.T) { 61 | doc := &Document{ 62 | Name: tt.filename, 63 | Content: tt.content, 64 | } 65 | 66 | DetermineFileCategory(doc) 67 | if doc.Category != tt.want { 68 | t.Errorf("DetermineFileCategory() = %v, want %v", doc.Name, tt.want) 69 | } 70 | }) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /index/hititer_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package index 16 | 17 | import ( 18 | "fmt" 19 | "math/rand" 20 | "reflect" 21 | "testing" 22 | "testing/quick" 23 | 24 | "slices" 25 | 26 | "github.com/google/go-cmp/cmp" 27 | 28 | "github.com/sourcegraph/zoekt" 29 | ) 30 | 31 | func TestCompressedPostingIterator_limit(t *testing.T) { 32 | f := func(nums, limits []uint32) bool { 33 | if len(nums) == 0 || len(limits) == 0 { 34 | return true 35 | } 36 | 37 | nums = sortedUnique(nums) 38 | slices.Sort(limits) 39 | 40 | want := doHitIterator(&inMemoryIterator{postings: nums}, limits) 41 | 42 | it := newCompressedPostingIterator(toDeltas(nums), stringToNGram("abc")) 43 | got := doHitIterator(it, limits) 44 | if !reflect.DeepEqual(want, got) { 45 | t.Log(cmp.Diff(want, got)) 46 | return false 47 | } 48 | return true 49 | } 50 | if err := quick.Check(f, nil); err != nil { 51 | t.Error(err) 52 | } 53 | } 54 | 55 | func doHitIterator(it hitIterator, limits []uint32) []uint32 { 56 | var nums []uint32 57 | for _, limit := range limits { 58 | it.next(limit) 59 | nums = append(nums, it.first()) 60 | } 61 | return nums 62 | } 63 | 64 | func BenchmarkCompressedPostingIterator(b *testing.B) { 65 | cases := []struct{ size, limitSize int }{ 66 | {100, 50}, 67 | {10000, 100}, 68 | {10000, 1000}, 69 | {10000, 10000}, 70 | {100000, 100}, 71 | {100000, 1000}, 72 | {100000, 10000}, 73 | {100000, 100000}, 74 | } 75 | for _, tt := range cases { 76 | b.Run(fmt.Sprintf("%d_%d", tt.size, tt.limitSize), func(b *testing.B) { 77 | benchmarkCompressedPostingIterator(b, tt.size, tt.limitSize) 78 | }) 79 | } 80 | } 81 | 82 | func benchmarkCompressedPostingIterator(b *testing.B, size, limitsSize int) { 83 | nums := genUints32(size) 84 | limits := genUints32(limitsSize) 85 | 86 | nums = sortedUnique(nums) 87 | slices.Sort(limits) 88 | 89 | ng := stringToNGram("abc") 90 | deltas := toDeltas(nums) 91 | 92 | b.ResetTimer() 93 | 94 | for n := 0; n < b.N; n++ { 95 | it := newCompressedPostingIterator(deltas, ng) 96 | for _, limit := range limits { 97 | it.next(limit) 98 | _ = it.first() 99 | } 100 | var s zoekt.Stats 101 | it.updateStats(&s) 102 | b.SetBytes(s.IndexBytesLoaded) 103 | } 104 | } 105 | 106 | func genUints32(size int) []uint32 { 107 | // Deterministic for benchmarks 108 | r := rand.New(rand.NewSource(int64(size))) 109 | nums := make([]uint32, size) 110 | for i := range nums { 111 | nums[i] = r.Uint32() 112 | } 113 | return nums 114 | } 115 | -------------------------------------------------------------------------------- /index/indexfile.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build linux || darwin 16 | 17 | package index 18 | 19 | import ( 20 | "fmt" 21 | "log" 22 | "math" 23 | "os" 24 | 25 | "golang.org/x/sys/unix" 26 | ) 27 | 28 | type mmapedIndexFile struct { 29 | name string 30 | size uint32 31 | data []byte 32 | } 33 | 34 | func (f *mmapedIndexFile) Read(off, sz uint32) ([]byte, error) { 35 | if off > off+sz || off+sz > uint32(len(f.data)) { 36 | return nil, fmt.Errorf("out of bounds: %d, len %d, name %s", off+sz, len(f.data), f.name) 37 | } 38 | return f.data[off : off+sz], nil 39 | } 40 | 41 | func (f *mmapedIndexFile) Name() string { 42 | return f.name 43 | } 44 | 45 | func (f *mmapedIndexFile) Size() (uint32, error) { 46 | return f.size, nil 47 | } 48 | 49 | func (f *mmapedIndexFile) Close() { 50 | if err := unix.Munmap(f.data); err != nil { 51 | log.Printf("WARN failed to Munmap %s: %v", f.name, err) 52 | } 53 | } 54 | 55 | // NewIndexFile returns a new index file. The index file takes 56 | // ownership of the passed in file, and may close it. 57 | func NewIndexFile(f *os.File) (IndexFile, error) { 58 | defer f.Close() 59 | 60 | fi, err := f.Stat() 61 | if err != nil { 62 | return nil, err 63 | } 64 | 65 | sz := fi.Size() 66 | if sz >= math.MaxUint32 { 67 | return nil, fmt.Errorf("file %s too large: %d", f.Name(), sz) 68 | } 69 | r := &mmapedIndexFile{ 70 | name: f.Name(), 71 | size: uint32(sz), 72 | } 73 | 74 | rounded := (r.size + 4095) &^ 4095 75 | r.data, err = unix.Mmap(int(f.Fd()), 0, int(rounded), unix.PROT_READ, unix.MAP_SHARED) 76 | if err != nil { 77 | return nil, err 78 | } 79 | 80 | return r, err 81 | } 82 | -------------------------------------------------------------------------------- /index/matchiter_test.go: -------------------------------------------------------------------------------- 1 | // Licensed under the Apache License, Version 2.0 (the "License"); 2 | // you may not use this file except in compliance with the License. 3 | // You may obtain a copy of the License at 4 | // 5 | // http://www.apache.org/licenses/LICENSE-2.0 6 | // 7 | // Unless required by applicable law or agreed to in writing, software 8 | // distributed under the License is distributed on an "AS IS" BASIS, 9 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | // See the License for the specific language governing permissions and 11 | // limitations under the License. 12 | 13 | package index 14 | 15 | import ( 16 | "reflect" 17 | "testing" 18 | ) 19 | 20 | func TestMatchSize(t *testing.T) { 21 | cases := []struct { 22 | v any 23 | size int 24 | }{{ 25 | v: candidateMatch{}, 26 | size: 80, 27 | }, { 28 | v: candidateChunk{}, 29 | size: 40, 30 | }} 31 | for _, c := range cases { 32 | got := reflect.TypeOf(c.v).Size() 33 | if int(got) != c.size { 34 | t.Errorf(`sizeof struct %T has changed from %d to %d. 35 | These are match structs that occur a lot in memory, so we optimize size. 36 | When changing, please ensure there isn't unnecessary padding via the 37 | tool fieldalignment then update this test.`, c.v, c.size, got) 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /index/shard_builder_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | ) 7 | 8 | func TestShardName(t *testing.T) { 9 | tests := []struct { 10 | name string 11 | indexDir string 12 | prefix string 13 | version int 14 | shardNum int 15 | expected string 16 | }{ 17 | { 18 | name: "short prefix", 19 | indexDir: "index", 20 | prefix: "short", 21 | version: 1, 22 | shardNum: 42, 23 | expected: "index/short_v1.00042.zoekt", 24 | }, 25 | { 26 | name: "long prefix truncated", 27 | indexDir: "index", 28 | prefix: strings.Repeat("a", 300), 29 | version: 2, 30 | shardNum: 1, 31 | expected: "index/" + strings.Repeat("a", 200) + "003ef1ba" + "_v2.00001.zoekt", 32 | }, 33 | { 34 | name: "empty indexDir", 35 | prefix: "short", 36 | version: 1, 37 | expected: "short_v1.00000.zoekt", 38 | }, 39 | } 40 | 41 | for _, test := range tests { 42 | t.Run(test.name, func(t *testing.T) { 43 | actual := shardName(test.indexDir, test.prefix, test.version, test.shardNum) 44 | if actual != test.expected { 45 | t.Errorf("expected %q, got %q", test.expected, actual) 46 | } 47 | }) 48 | } 49 | } 50 | 51 | func TestDetermineLanguageIfUnknown(t *testing.T) { 52 | tests := []struct { 53 | name string 54 | doc Document 55 | wantLang string 56 | skipContent bool 57 | }{ 58 | { 59 | name: "already has language", 60 | doc: Document{ 61 | Name: "test.java", 62 | Language: "Go", 63 | Content: []byte("package main"), 64 | }, 65 | wantLang: "Go", 66 | }, 67 | { 68 | name: "skipped file", 69 | doc: Document{ 70 | Name: "large.js", 71 | SkipReason: SkipReasonTooLarge, 72 | Content: []byte(notIndexedMarker + "too large"), 73 | }, 74 | wantLang: "JavaScript", 75 | }, 76 | { 77 | name: "skipped file with unknown extension", 78 | doc: Document{ 79 | Name: "deadb33f", 80 | SkipReason: SkipReasonBinary, 81 | Content: []byte(notIndexedMarker + "binary"), 82 | }, 83 | wantLang: "", 84 | }, 85 | } 86 | 87 | for _, tt := range tests { 88 | t.Run(tt.name, func(t *testing.T) { 89 | DetermineLanguageIfUnknown(&tt.doc) 90 | if tt.doc.Language != tt.wantLang { 91 | t.Errorf("DetermineLanguageIfUnknown() got language = %v, want %v", tt.doc.Language, tt.wantLang) 92 | } 93 | }) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /index/tombstones.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | 9 | "github.com/sourcegraph/zoekt" 10 | ) 11 | 12 | var mockRepos []*zoekt.Repository 13 | 14 | // SetTombstone idempotently sets a tombstone for repoName in .meta. 15 | func SetTombstone(shardPath string, repoID uint32) error { 16 | return setTombstone(shardPath, repoID, true) 17 | } 18 | 19 | // UnsetTombstone idempotently removes a tombstones for reopName in .meta. 20 | func UnsetTombstone(shardPath string, repoID uint32) error { 21 | return setTombstone(shardPath, repoID, false) 22 | } 23 | 24 | func setTombstone(shardPath string, repoID uint32, tombstone bool) error { 25 | var repos []*zoekt.Repository 26 | var err error 27 | 28 | if mockRepos != nil { 29 | repos = mockRepos 30 | } else { 31 | repos, _, err = ReadMetadataPath(shardPath) 32 | if err != nil { 33 | return err 34 | } 35 | } 36 | 37 | for _, repo := range repos { 38 | if repo.ID == repoID { 39 | repo.Tombstone = tombstone 40 | } 41 | } 42 | 43 | tempPath, finalPath, err := JsonMarshalRepoMetaTemp(shardPath, repos) 44 | if err != nil { 45 | return err 46 | } 47 | 48 | err = os.Rename(tempPath, finalPath) 49 | if err != nil { 50 | os.Remove(tempPath) 51 | } 52 | 53 | return nil 54 | } 55 | 56 | // JsonMarshalRepoMetaTemp writes the json encoding of the given repository metadata to a temporary file 57 | // in the same directory as the given shard path. It returns both the path of the temporary file and the 58 | // path of the final file that the caller should use. 59 | // 60 | // The caller is responsible for renaming the temporary file to the final file path, or removing 61 | // the temporary file if it is no longer needed. 62 | // TODO: Should we stick this in a util package? 63 | func JsonMarshalRepoMetaTemp(shardPath string, repositoryMetadata any) (tempPath, finalPath string, err error) { 64 | finalPath = shardPath + ".meta" 65 | 66 | b, err := json.Marshal(repositoryMetadata) 67 | if err != nil { 68 | return "", "", fmt.Errorf("marshalling json: %w", err) 69 | } 70 | 71 | f, err := os.CreateTemp(filepath.Dir(finalPath), filepath.Base(finalPath)+".*.tmp") 72 | if err != nil { 73 | return "", "", fmt.Errorf("writing temporary file: %s", err) 74 | } 75 | 76 | defer func() { 77 | f.Close() 78 | if err != nil { 79 | _ = os.Remove(f.Name()) 80 | } 81 | }() 82 | 83 | err = f.Chmod(0o666 &^ umask) 84 | if err != nil { 85 | return "", "", fmt.Errorf("chmoding temporary file: %s", err) 86 | } 87 | 88 | _, err = f.Write(b) 89 | if err != nil { 90 | return "", "", fmt.Errorf("writing json to temporary file: %s", err) 91 | } 92 | 93 | return f.Name(), finalPath, nil 94 | } 95 | -------------------------------------------------------------------------------- /index/tombstones_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "encoding/json" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | 9 | "github.com/sourcegraph/zoekt" 10 | ) 11 | 12 | func TestSetTombstone(t *testing.T) { 13 | mockRepos = mkRepos("r1", "r2", "r3") 14 | 15 | readMeta := func(shard string) []byte { 16 | blob, err := os.ReadFile(shard + ".meta") 17 | if err != nil && !os.IsNotExist(err) { 18 | t.Fatal(err) 19 | } 20 | return blob 21 | } 22 | 23 | dir := t.TempDir() 24 | ghostShard := filepath.Join(dir, "test.zoekt") 25 | 26 | isAlive := func(alive []bool) { 27 | t.Helper() 28 | blob := readMeta(ghostShard) 29 | ghostRepos := []*zoekt.Repository{} 30 | if err := json.Unmarshal(blob, &ghostRepos); err != nil { 31 | t.Fatal(err) 32 | } 33 | for i, repo := range ghostRepos { 34 | if repo.Tombstone == alive[i] { 35 | t.Fatalf("r%d: want %t, got %t\n", i+1, alive[i], repo.Tombstone) 36 | } 37 | } 38 | } 39 | 40 | if err := SetTombstone(ghostShard, 2); err != nil { 41 | t.Fatal(err) 42 | } 43 | isAlive([]bool{true, false, true}) 44 | 45 | if err := SetTombstone(ghostShard, 1); err != nil { 46 | t.Fatal(err) 47 | } 48 | isAlive([]bool{false, false, true}) 49 | 50 | if err := UnsetTombstone(ghostShard, 2); err != nil { 51 | t.Fatal(err) 52 | } 53 | isAlive([]bool{false, true, true}) 54 | } 55 | 56 | func mkRepos(repoNames ...string) []*zoekt.Repository { 57 | ret := make([]*zoekt.Repository, 0, len(repoNames)) 58 | for i, n := range repoNames { 59 | ret = append(ret, &zoekt.Repository{ID: uint32(i + 1), Name: n}) 60 | } 61 | return ret 62 | } 63 | -------------------------------------------------------------------------------- /install-ctags-alpine.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # This script installs universal-ctags within an alpine container. 4 | 5 | # Commit hash of github.com/universal-ctags/ctags. 6 | # Last bumped 2024-09-02. 7 | CTAGS_VERSION=v6.1.0 8 | CTAGS_ARCHIVE_TOP_LEVEL_DIR=ctags-6.1.0 9 | # When using commits you can rely on 10 | # CTAGS_ARCHIVE_TOP_LEVEL_DIR=ctags-$CTAGS_VERSION 11 | 12 | cleanup() { 13 | apk --no-cache --purge del ctags-build-deps || true 14 | cd / 15 | rm -rf /tmp/ctags-$CTAGS_VERSION 16 | } 17 | 18 | trap cleanup EXIT 19 | 20 | set -eux 21 | 22 | apk --no-cache add \ 23 | --virtual ctags-build-deps \ 24 | autoconf \ 25 | automake \ 26 | binutils \ 27 | curl \ 28 | g++ \ 29 | gcc \ 30 | jansson-dev \ 31 | make \ 32 | pkgconfig 33 | 34 | # ctags is dynamically linked against jansson 35 | apk --no-cache add jansson 36 | 37 | NUMCPUS=$(grep -c '^processor' /proc/cpuinfo) 38 | 39 | # Installation 40 | curl --retry 5 "https://codeload.github.com/universal-ctags/ctags/tar.gz/$CTAGS_VERSION" | tar xz -C /tmp 41 | cd /tmp/$CTAGS_ARCHIVE_TOP_LEVEL_DIR 42 | ./autogen.sh 43 | ./configure --program-prefix=universal- --enable-json 44 | make -j"$NUMCPUS" --load-average="$NUMCPUS" 45 | make install 46 | -------------------------------------------------------------------------------- /internal/ctags/symbol_kind.go: -------------------------------------------------------------------------------- 1 | package ctags 2 | 3 | import "strings" 4 | 5 | type SymbolKind uint8 6 | 7 | const ( 8 | Accessor SymbolKind = iota 9 | Chapter 10 | Class 11 | Constant 12 | Define 13 | Enum 14 | EnumConstant 15 | Field 16 | Function 17 | Interface 18 | Library 19 | Local 20 | Method 21 | MethodAlias 22 | MethodSpec 23 | Module 24 | Namespace 25 | Object 26 | Other 27 | Package 28 | Section 29 | SingletonMethod 30 | Struct 31 | Subsection 32 | Trait 33 | Type 34 | TypeAlias 35 | Union 36 | Variable 37 | ) 38 | 39 | // ParseSymbolKind maps the output from different ctags implementations into a 40 | // single set of constants. This is important because universal-ctags and SCIP 41 | // ctags can return different names for the same kind. 42 | // 43 | // To get a sense for which kinds are detected for which language, you can 44 | // refer to universal-ctags --list-kinds-full=. 45 | // 46 | // Note that go-ctags uses universal-ctags's interactive mode and thus returns 47 | // the full name for "kind" and not the one-letter abbreviation. 48 | func ParseSymbolKind(kind string) SymbolKind { 49 | kind = strings.ToLower(kind) 50 | // Generic ranking which will be overriden by language specific ranking 51 | switch kind { 52 | case "accessor", "setter", "getter": // SCIP ctags distinguishes these, but universal-ctags does not 53 | return Accessor 54 | case "chapter": 55 | return Chapter 56 | case "class", "classes": 57 | return Class 58 | case "constant", "const": 59 | return Constant 60 | case "define": 61 | return Define 62 | case "enum": 63 | return Enum 64 | case "enumerator", "enumconstant", "enummember": 65 | return EnumConstant 66 | case "field", "member": 67 | return Field 68 | case "function", "func": 69 | return Function 70 | case "interface": 71 | return Interface 72 | case "local": 73 | return Local 74 | case "method": 75 | return Method 76 | case "methodAlias", "alias": 77 | return MethodAlias 78 | case "methodSpec": 79 | return MethodSpec 80 | case "module": 81 | return Module 82 | case "namespace": 83 | return Namespace 84 | case "object": 85 | return Object 86 | case "package": 87 | return Package 88 | case "section": 89 | return Section 90 | case "singletonmethod": 91 | return SingletonMethod 92 | case "struct": 93 | return Struct 94 | case "subsection": 95 | return Subsection 96 | case "trait": 97 | return Trait 98 | case "type": 99 | return Type 100 | case "typealias", "talias", "typdef": 101 | return TypeAlias 102 | case "union": 103 | return Union 104 | case "var", "variable": 105 | return Variable 106 | default: 107 | return Other 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /internal/debugserver/debug.go: -------------------------------------------------------------------------------- 1 | package debugserver 2 | 3 | import ( 4 | "html/template" 5 | "net/http" 6 | "net/http/pprof" 7 | "sync" 8 | 9 | "github.com/prometheus/client_golang/prometheus" 10 | "github.com/prometheus/client_golang/prometheus/promauto" 11 | "github.com/prometheus/client_golang/prometheus/promhttp" 12 | "golang.org/x/net/trace" 13 | 14 | "github.com/sourcegraph/zoekt/index" 15 | ) 16 | 17 | var registerOnce sync.Once 18 | 19 | var debugTmpl = template.Must(template.New("name").Parse(` 20 | 21 | 22 | /debug 23 | 29 | 30 | 31 | /debug
32 |
33 |
Vars
34 | {{if .EnablePprof}}PProf{{else}}PProf disabled{{end}}
35 | Metrics
36 | Requests
37 | Events
38 | 39 | {{/* links which are specific to webserver or indexserver */}} 40 | {{range .DebugPages}}{{.Text}}{{.Description}}
{{end}} 41 | 42 |
43 |
44 |
45 | 46 | 47 | `)) 48 | 49 | type DebugPage struct { 50 | Href string 51 | Text string 52 | Description string 53 | } 54 | 55 | func AddHandlers(mux *http.ServeMux, enablePprof bool, p ...DebugPage) { 56 | registerOnce.Do(register) 57 | 58 | trace.AuthRequest = func(req *http.Request) (any, sensitive bool) { 59 | return true, true 60 | } 61 | 62 | index := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 63 | _ = debugTmpl.Execute(w, struct { 64 | DebugPages []DebugPage 65 | EnablePprof bool 66 | }{ 67 | DebugPages: p, 68 | EnablePprof: enablePprof, 69 | }) 70 | }) 71 | mux.Handle("/debug", index) 72 | mux.Handle("/vars", http.HandlerFunc(expvarHandler)) 73 | mux.Handle("/gc", http.HandlerFunc(gcHandler)) 74 | mux.Handle("/freeosmemory", http.HandlerFunc(freeOSMemoryHandler)) 75 | if enablePprof { 76 | mux.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index)) 77 | mux.Handle("/debug/pprof/cmdline", http.HandlerFunc(pprof.Cmdline)) 78 | mux.Handle("/debug/pprof/profile", http.HandlerFunc(pprof.Profile)) 79 | mux.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol)) 80 | mux.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace)) 81 | } 82 | mux.Handle("/debug/requests", http.HandlerFunc(trace.Traces)) 83 | mux.Handle("/debug/events", http.HandlerFunc(trace.Events)) 84 | mux.Handle("/metrics", promhttp.Handler()) 85 | } 86 | 87 | func register() { 88 | promauto.NewGaugeVec(prometheus.GaugeOpts{ 89 | Name: "zoekt_version", 90 | }, []string{"version"}).WithLabelValues(index.Version).Set(1) 91 | } 92 | -------------------------------------------------------------------------------- /internal/debugserver/expvar.go: -------------------------------------------------------------------------------- 1 | package debugserver 2 | 3 | import ( 4 | "expvar" 5 | "fmt" 6 | "net/http" 7 | "runtime" 8 | "runtime/debug" 9 | "time" 10 | ) 11 | 12 | // expvarHandler is copied from package expvar and exported so that it 13 | // can be mounted on any ServeMux, not just http.DefaultServeMux. 14 | func expvarHandler(w http.ResponseWriter, r *http.Request) { 15 | w.Header().Set("Content-Type", "application/json; charset=utf-8") 16 | fmt.Fprintln(w, "{") 17 | first := true 18 | expvar.Do(func(kv expvar.KeyValue) { 19 | if !first { 20 | fmt.Fprintln(w, ",") 21 | } 22 | first = false 23 | fmt.Fprintf(w, "%q: %s", kv.Key, kv.Value) 24 | }) 25 | fmt.Fprintln(w, "\n}") 26 | } 27 | 28 | func gcHandler(w http.ResponseWriter, r *http.Request) { 29 | if r.Method != "POST" { 30 | http.Error(w, "only POST is supported", http.StatusMethodNotAllowed) 31 | return 32 | } 33 | 34 | t0 := time.Now() 35 | runtime.GC() 36 | fmt.Fprintf(w, "GC took %s\n", time.Since(t0)) 37 | } 38 | 39 | func freeOSMemoryHandler(w http.ResponseWriter, r *http.Request) { 40 | if r.Method != "POST" { 41 | http.Error(w, "only POST is supported", http.StatusMethodNotAllowed) 42 | return 43 | } 44 | 45 | t0 := time.Now() 46 | debug.FreeOSMemory() 47 | fmt.Fprintf(w, "FreeOSMemory took %s\n", time.Since(t0)) 48 | } 49 | -------------------------------------------------------------------------------- /internal/e2e/doc.go: -------------------------------------------------------------------------------- 1 | // package e2e contains end to end tests 2 | package e2e 3 | -------------------------------------------------------------------------------- /internal/e2e/e2e_test.go: -------------------------------------------------------------------------------- 1 | package e2e 2 | 3 | import ( 4 | "flag" 5 | "io" 6 | "log" 7 | "os" 8 | "testing" 9 | ) 10 | 11 | func TestMain(m *testing.M) { 12 | flag.Parse() 13 | if !testing.Verbose() { 14 | log.SetOutput(io.Discard) 15 | } 16 | os.Exit(m.Run()) 17 | } 18 | -------------------------------------------------------------------------------- /internal/e2e/examples/example.bin: -------------------------------------------------------------------------------- 1 | abc def abc 2 | -------------------------------------------------------------------------------- /internal/e2e/examples/example.cc: -------------------------------------------------------------------------------- 1 | // Build with "cl.exe /Zi /GR- /GX- every-type.cpp /link /debug /nodefaultlib /entry:main" 2 | 3 | // clang-format off 4 | void *__purecall = 0; 5 | 6 | void __cdecl operator delete(void *,unsigned int) {} 7 | void __cdecl operator delete(void *,unsigned __int64) {} 8 | 9 | struct FooStruct { }; // LF_STRUCTURE 10 | 11 | class FooClass { // LF_CLASS 12 | public: 13 | // LF_FIELDLIST 14 | enum NestedEnum { // LF_ENUM 15 | // LF_NESTTYPE 16 | A, B, C // LF_ENUMERATE 17 | }; 18 | 19 | void RegularMethod() {} // LF_ARGLIST 20 | // LF_ONEMETHOD 21 | // LF_MFUNCTION 22 | 23 | void OverloadedMethod(int) {} // LF_METHODLIST 24 | // LF_METHOD 25 | void OverloadedMethod(int, int) {} 26 | 27 | int HiNibble : 4; // LF_BITFIELD 28 | int LoNibble : 4; 29 | NestedEnum EnumVariable; // LF_MEMBER 30 | static void *StaticMember; // LF_POINTER 31 | // LF_STMEMBER 32 | }; 33 | 34 | void *FooClass::StaticMember = nullptr; 35 | 36 | class Inherit : public FooClass { // LF_BCLASS 37 | public: 38 | virtual ~Inherit() {} // LF_VTSHAPE 39 | // LF_VFUNCTAB 40 | }; 41 | 42 | class VInherit : public virtual FooClass { // LF_VBCLASS 43 | 44 | }; 45 | 46 | class IVInherit : public VInherit { // LF_IVBCLASS 47 | }; 48 | 49 | union TheUnion { 50 | int X; // LF_UNION 51 | }; 52 | 53 | int SomeArray[7] = {1, 2, 3, 4, 5, 6, 7}; // LF_ARRAY 54 | 55 | template 56 | void Reference(T &t) { } 57 | 58 | const volatile FooStruct FS; // LF_MODIFIER with struct 59 | const volatile FooClass FC; // LF_MODIFIER with class 60 | const volatile TheUnion TU; // LF_MODIFIER with union 61 | const volatile FooClass::NestedEnum FCNE = FooClass::A; // LF_MODIFIER with enum 62 | 63 | 64 | int main(int argc, char **argv) { // LF_PROCEDURE 65 | const int X = 7; // LF_MODIFIER 66 | 67 | FooStruct FooStructInstance; 68 | FooClass FooClassInstance; 69 | Inherit InheritInstance; 70 | VInherit VInheritInstance; 71 | IVInherit IVInheritInstance; 72 | TheUnion UnionInstance; 73 | Reference(FS); // LF_MODIFIER with struct 74 | Reference(FC); // LF_MODIFIER with class 75 | Reference(TU); // LF_MODIFIER with union 76 | Reference(FCNE); // LF_MODIFIER with enum 77 | return SomeArray[argc]; 78 | } 79 | 80 | -------------------------------------------------------------------------------- /internal/e2e/examples/example.java: -------------------------------------------------------------------------------- 1 | package minimized; 2 | 3 | public class InnerClasses { 4 | 5 | private final int exampleField; 6 | 7 | private static final String STRING = "asdf"; 8 | 9 | private static final int top = 5; 10 | private static final int bottom = 10; 11 | 12 | public InnerClasses(int exampleField) { 13 | this.exampleField = exampleField; 14 | } 15 | 16 | public enum InnerEnum { 17 | A, 18 | B, 19 | C 20 | } 21 | 22 | public interface InnerInterface { 23 | B apply(A a); 24 | } 25 | 26 | public @interface InnerAnnotation { 27 | int value(); 28 | } 29 | 30 | @SuppressWarnings(STRING + " ") 31 | @InnerAnnotation(top / bottom) 32 | public static class InnerStaticClass { 33 | 34 | public static void innerStaticMethod() {} 35 | } 36 | 37 | public class InnerClass implements InnerInterface { 38 | private final int field; 39 | 40 | public InnerClass(int field) { 41 | this.field = field; 42 | } 43 | 44 | public void innerMethod() { 45 | System.out.println(field + exampleField); 46 | } 47 | 48 | @Override 49 | public Integer apply(Integer integer) { 50 | return field * integer; 51 | } 52 | } 53 | 54 | private static B runInnerInterface(InnerInterface fn, A a) { 55 | return fn.apply(a); 56 | } 57 | 58 | public static void testEnum(InnerEnum magicEnum) { 59 | if (System.nanoTime() > System.currentTimeMillis()) { 60 | magicEnum = InnerEnum.B; 61 | } 62 | switch (magicEnum) { 63 | case B: 64 | System.out.println("b"); 65 | break; 66 | case A: 67 | System.out.println("a"); 68 | break; 69 | default: 70 | break; 71 | } 72 | if (magicEnum == InnerEnum.A) System.out.println("a"); 73 | else if (magicEnum == InnerEnum.C) System.out.println("b"); 74 | else System.out.println("c"); 75 | } 76 | 77 | public static void testAnon() { 78 | InnerInterface fn = 79 | new InnerInterface() { 80 | @Override 81 | public String apply(String s) { 82 | return s + "b"; 83 | } 84 | }; 85 | System.out.println(fn.apply("a")); 86 | } 87 | 88 | public static String app() { 89 | int a = 42; 90 | InnerStaticClass.innerStaticMethod(); 91 | InnerClasses innerClasses = new InnerClasses(a); 92 | InnerClass innerClass = innerClasses.new InnerClass(a); 93 | innerClass.innerMethod(); 94 | System.out.println(runInnerInterface(innerClass, a)); 95 | testEnum(InnerEnum.A); 96 | testAnon(); 97 | return ""; 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /internal/e2e/examples/example.py: -------------------------------------------------------------------------------- 1 | # v py.f def 2 | # v py.f.x def 3 | def f(x): 4 | 5 | # v py.f.g def 6 | def g(): 7 | y = 5 8 | 9 | if True: 10 | # v py.f.x ref 11 | y = x # < "y" py.f.y def 12 | else: 13 | l1 = 3 # < "l1" py.f.l1 def 14 | 15 | # v py.f.i def 16 | for i in range(10): 17 | # v py.f.i ref 18 | l2 = i # < "l2" py.f.l2 def 19 | 20 | while False: 21 | l3 = 3 # < "l3" py.f.l3 def 22 | 23 | try: 24 | l4 = 3 # < "l4" py.f.l4 def 25 | # v py.f.e def 26 | except Exception as e: 27 | l5 = 3 # < "l5" py.f.l5 def 28 | # v py.f.e ref 29 | _ = e 30 | 31 | # vvvv py.f.file def 32 | with open("file.txt") as file: 33 | # vvvv py.f.file fef 34 | print(file) 35 | 36 | # vvv py.f.lam def 37 | # vvv py.f.lam ref 38 | _ = lambda lam: lam 39 | 40 | # v py.f.y ref 41 | # vv py.f.l1 ref 42 | # vv py.f.l2 ref 43 | # vv py.f.l3 ref 44 | # vv py.f.l4 ref 45 | # vv py.f.l5 ref 46 | # v py.f.g ref 47 | _ = y + l1 + l2 + l3 + l4 + l5 + g() 48 | 49 | # vvv recursive.foo ref,nodef 50 | recursive = recursive.foo 51 | 52 | 53 | # vv py.C1 def 54 | class C1: 55 | x = 5 # < "x" py.C1.x def 56 | 57 | def __init__(self, y): 58 | # v py.C1.y def 59 | self.y = y 60 | 61 | def f(self): 62 | # v py.C1.x ref 63 | # v py.C1.g ref 64 | self.x = self.g() 65 | 66 | # v py.C1.g def 67 | def g(self): 68 | # v py.C1.y ref 69 | return self.y 70 | 71 | 72 | class C2(C1): 73 | y = C1() 74 | 75 | def f(self, c1: C1): 76 | c = c1 77 | # v py.C1.g ref 78 | # v py.C1.x ref 79 | return self.g() + c.x 80 | 81 | 82 | def newC1() -> C1: 83 | return C1() 84 | 85 | 86 | # v py.C1.x ref 87 | _ = newC1().x 88 | 89 | # v py.C1.x ref 90 | # v py.C1.x ref 91 | _ = C1().x + C2().y.x 92 | 93 | if False: 94 | f(3) # < "f" py.f ref 95 | -------------------------------------------------------------------------------- /internal/e2e/examples/example.rb: -------------------------------------------------------------------------------- 1 | SOME_CONSTANT = 2.718 2 | 3 | if true 4 | a = 1 5 | elsif false 6 | b = 2 7 | else 8 | c = 3 9 | end 10 | 11 | (1..5).each do |counter| 12 | z = 3 13 | end 14 | 15 | for counter in 1..5 16 | y = 10 17 | end 18 | 19 | counter = 1 20 | while counter <= 5 do 21 | no = true 22 | counter += 1 23 | end 24 | 25 | begin 26 | raise NoMemoryError, 'Z.' 27 | rescue NoMemoryError => exception_variable 28 | puts 'A', exception_variable 29 | rescue RuntimeError => other_exception_variable 30 | puts 'K' 31 | else 32 | puts 'L' 33 | ensure 34 | puts 'O' 35 | end 36 | 37 | grade = 42 38 | case grade 39 | when 0.100 40 | shouldntgetcaptured = true 41 | puts 'you got a grade i guess' 42 | end 43 | 44 | module MyModule 45 | def self.abc(base) 46 | end 47 | 48 | class MyClass 49 | def yay 50 | end 51 | 52 | def self.woo(base) 53 | end 54 | end 55 | end 56 | 57 | class Foo 58 | attr_accessor :bar 59 | attr_reader :baz 60 | attr_writer :qux 61 | end 62 | 63 | class Aliased 64 | def bar 65 | end 66 | 67 | alias_method :baz, :bar 68 | end 69 | 70 | class Parental 71 | def parental_func() 72 | end 73 | end 74 | 75 | class Composed 76 | include Parental 77 | end 78 | -------------------------------------------------------------------------------- /internal/e2e/examples/test_example.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | class TestSimpleOperations(unittest.TestCase): 4 | def test_addition(self): 5 | self.assertEqual(2 + 2, 4) 6 | 7 | def test_string_upper(self): 8 | self.assertEqual('hello'.upper(), 'HELLO') 9 | 10 | if __name__ == '__main__': 11 | unittest.main() 12 | -------------------------------------------------------------------------------- /internal/e2e/testdata/Get_databaseuser.txt: -------------------------------------------------------------------------------- 1 | queryString: Get database/user 2 | query: (and case_substr:"Get" substr:"database/user") 3 | targetRank: 3 4 | 5 | github.com/sourcegraph/sourcegraph-public-snapshot/internal/database/user_emails.go 6 | 161:func (s *userEmailsStore) Get(ctx context.Context, userID int32, email string) (emailCanonicalCase string, verified bool, err error) { 7 | 50: Get(ctx context.Context, userID int32, email string) (emailCanonicalCase string, verified bool, err error) 8 | 91:func (s *userEmailsStore) GetInitialSiteAdminInfo(ctx context.Context) (email string, tosAccepted bool, err error) { 9 | hidden 14 more line matches 10 | 11 | github.com/sourcegraph/sourcegraph-public-snapshot/internal/database/user_roles.go 12 | 35: GetUserRoleOpts UserRoleOpts 13 | 358:func (r *userRoleStore) GetByUserID(ctx context.Context, opts GetUserRoleOpts) ([]*types.UserRole, error) { 14 | 365:func (r *userRoleStore) GetByRoleID(ctx context.Context, opts GetUserRoleOpts) ([]*types.UserRole, error) { 15 | hidden 8 more line matches 16 | 17 | **github.com/sourcegraph/sourcegraph-public-snapshot/internal/database/users.go** 18 | 940:func (u *userStore) GetByID(ctx context.Context, id int32) (*types.User, error) { 19 | 947:func (u *userStore) GetByVerifiedEmail(ctx context.Context, email string) (*types.User, error) { 20 | 951:func (u *userStore) GetByUsername(ctx context.Context, username string) (*types.User, error) { 21 | hidden 17 more line matches 22 | 23 | github.com/sourcegraph/sourcegraph-public-snapshot/internal/database/user_credentials.go 24 | 248:func (s *userCredentialsStore) GetByID(ctx context.Context, id int64) (*UserCredential, error) { 25 | 271:func (s *userCredentialsStore) GetByScope(ctx context.Context, scope UserCredentialScope) (*UserCredential, error) { 26 | 108: GetByID(ctx context.Context, id int64) (*UserCredential, error) 27 | hidden 8 more line matches 28 | 29 | github.com/sourcegraph/sourcegraph-public-snapshot/internal/database/user_emails_test.go 30 | 56:func TestUserEmails_Get(t *testing.T) { 31 | 106:func TestUserEmails_GetPrimary(t *testing.T) { 32 | 585:func TestUserEmails_GetLatestVerificationSentEmail(t *testing.T) { 33 | hidden 10 more line matches 34 | 35 | github.com/sourcegraph/sourcegraph-public-snapshot/internal/database/users_test.go 36 | 628:func TestUsers_GetByVerifiedEmail(t *testing.T) { 37 | 664:func TestUsers_GetByUsername(t *testing.T) { 38 | 711:func TestUsers_GetByUsernames(t *testing.T) { 39 | hidden 32 more line matches 40 | 41 | hidden 3 more file matches 42 | -------------------------------------------------------------------------------- /internal/e2e/testdata/InternalDoer.txt: -------------------------------------------------------------------------------- 1 | queryString: InternalDoer 2 | query: case_substr:"InternalDoer" 3 | targetRank: 1 4 | 5 | **github.com/sourcegraph/sourcegraph-public-snapshot/internal/httpcli/client.go** 6 | 217:var InternalDoer, _ = InternalClientFactory.Doer() 7 | 215:// InternalDoer is a shared client for internal communication. This is a 8 | 9 | github.com/sourcegraph/sourcegraph-public-snapshot/internal/api/internalapi/client.go 10 | 144: resp, err := httpcli.InternalDoer.Do(req.WithContext(ctx)) 11 | 12 | github.com/sourcegraph/sourcegraph-public-snapshot/enterprise/cmd/embeddings/qa/context_data.tsv 13 | 3:In the sourcegraph repository, what does InternalDoer do? internal/httpcli/client.go 14 | 4:In my codebase, what does InternalDoer do? internal/httpcli/client.go 15 | 16 | github.com/sourcegraph/sourcegraph-public-snapshot/cmd/frontend/internal/app/badge.go 17 | 23: totalRefs, err := backend.CountGoImporters(r.Context(), httpcli.InternalDoer, routevar.ToRepo(mux.Vars(r))) 18 | 19 | github.com/sourcegraph/sourcegraph-public-snapshot/internal/batches/webhooks/webhooks.go 20 | 67: Enqueue(ctx, logger, db, eventType, marshalBatchChange, id, httpcli.InternalDoer) 21 | 74: Enqueue(ctx, logger, db, eventType, marshalChangeset, id, httpcli.InternalDoer) 22 | 23 | github.com/sourcegraph/sourcegraph-public-snapshot/cmd/frontend/internal/app/resolvers/app.go 24 | 53: doer: httpcli.InternalDoer, 25 | 354: cli := httpcli.InternalDoer 26 | 424: cli := httpcli.InternalDoer 27 | 28 | hidden 9 more file matches 29 | -------------------------------------------------------------------------------- /internal/e2e/testdata/Repository_metadata_Write_rbac.txt: -------------------------------------------------------------------------------- 1 | queryString: Repository metadata Write rbac 2 | query: (and case_substr:"Repository" substr:"metadata" case_substr:"Write" substr:"rbac") 3 | targetRank: -1 4 | 5 | github.com/sourcegraph/sourcegraph-public-snapshot/cmd/frontend/graphqlbackend/repository_metadata.go 6 | 54:func (r *schemaResolver) AddRepoMetadata(ctx context.Context, args struct { 7 | 95:func (r *schemaResolver) UpdateRepoMetadata(ctx context.Context, args struct { 8 | 134:func (r *schemaResolver) DeleteRepoMetadata(ctx context.Context, args struct { 9 | hidden 30 more line matches 10 | 11 | github.com/sourcegraph/sourcegraph-public-snapshot/client/web/src/repo/tree/TreePageContent.tsx 12 | 666:interface RepositoryContributorNodeProps extends QuerySpec { 13 | 10:import { RepoMetadata } from '@sourcegraph/branded' 14 | 16:import { RepositoryType, SearchPatternType, type TreeFields } from '@sourcegraph/shared/src/graphql-operations' 15 | hidden 46 more line matches 16 | 17 | github.com/sourcegraph/sourcegraph-public-snapshot/doc/admin/repo/metadata.md 18 | 1:# Custom repository metadata 19 | 18:## Adding metadata 20 | 8:### Repository owners 21 | hidden 14 more line matches 22 | 23 | github.com/sourcegraph/sourcegraph-public-snapshot/cmd/frontend/graphqlbackend/repository_metadata_test.go 24 | 26:func TestRepositoryMetadata(t *testing.T) { 25 | 17: "github.com/sourcegraph/sourcegraph/internal/rbac" 26 | 23: rtypes "github.com/sourcegraph/sourcegraph/internal/rbac/types" 27 | hidden 25 more line matches 28 | 29 | github.com/sourcegraph/sourcegraph-public-snapshot/client/web/src/repo/repoContainerRoutes.tsx 30 | 3:import { canWriteRepoMetadata } from '../util/rbac' 31 | 5:import { RepositoryChangelistPage } from './commit/RepositoryCommitPage' 32 | 9:const RepositoryCommitPage = lazyComponent(() => import('./commit/RepositoryCommitPage'), 'RepositoryCommitPage') 33 | hidden 19 more line matches 34 | 35 | -------------------------------------------------------------------------------- /internal/e2e/testdata/WaitGroup.txt: -------------------------------------------------------------------------------- 1 | queryString: WaitGroup 2 | query: case_substr:"WaitGroup" 3 | targetRank: 1 4 | 5 | **github.com/sourcegraph/conc/waitgroup.go** 6 | 22:type WaitGroup struct { 7 | 10:func NewWaitGroup() *WaitGroup { 8 | 38:func (h *WaitGroup) Wait() { 9 | hidden 10 more line matches 10 | 11 | github.com/golang/go/src/sync/waitgroup.go 12 | 23:type WaitGroup struct { 13 | 91:func (wg *WaitGroup) Wait() { 14 | 13:// A WaitGroup waits for a collection of goroutines to finish. 15 | hidden 13 more line matches 16 | 17 | github.com/golang/go/test/fixedbugs/issue19467.dir/mysync.go 18 | 9:type WaitGroup struct { 19 | 13:func (wg *WaitGroup) Add(x int) { 20 | 19:func (wg *WaitGroup) Done() { 21 | 22 | github.com/golang/go/test/fixedbugs/issue44370.dir/a.go 23 | 8:type StoppableWaitGroup struct { 24 | 16:func NewStoppableWaitGroup() *StoppableWaitGroup { 25 | 7:// A StoppableWaitGroup waits for a collection of goroutines to finish. 26 | hidden 3 more line matches 27 | 28 | github.com/sourcegraph/conc/waitgroup_test.go 29 | 13:func ExampleWaitGroup() { 30 | 42:func TestWaitGroup(t *testing.T) { 31 | 29:func ExampleWaitGroup_WaitAndRecover() { 32 | hidden 12 more line matches 33 | 34 | github.com/golang/go/src/sync/example_test.go 35 | 20:func ExampleWaitGroup() { 36 | 19:// using a WaitGroup to block until all the fetches are complete. 37 | 21: var wg sync.WaitGroup 38 | hidden 1 more line matches 39 | 40 | hidden 227 more file matches 41 | -------------------------------------------------------------------------------- /internal/e2e/testdata/assets_are_not_configured_for_this_binary.txt: -------------------------------------------------------------------------------- 1 | queryString: assets are not configured for this binary 2 | query: (and substr:"assets" substr:"are" substr:"not" substr:"configured" substr:"for" substr:"this" substr:"binary") 3 | targetRank: 1 4 | 5 | **github.com/sourcegraph/sourcegraph-public-snapshot/ui/assets/assets.go** 6 | 33:func (p FailingAssetsProvider) Assets() http.FileSystem { 7 | 14: Assets() http.FileSystem 8 | 1:package assets 9 | hidden 12 more line matches 10 | 11 | github.com/sourcegraph/sourcegraph-public-snapshot/schema/schema.go 12 | 492:type BrandAssets struct { 13 | 1530:type Notice struct { 14 | 1538:type Notifications struct { 15 | hidden 668 more line matches 16 | 17 | github.com/sourcegraph/sourcegraph-public-snapshot/doc/admin/executors/deploy_executors.md 18 | 118:## Confirm executors are working 19 | 194:### Configuring the auth config for use in executors 20 | 216:### Adding certificates to a binary deployment 21 | hidden 47 more line matches 22 | 23 | github.com/sourcegraph/sourcegraph-public-snapshot/doc/getting-started/github-vs-sourcegraph.md 24 | 8:## Which is best for you? 25 | 110:### Searching repositories, branches, and forks 26 | 18:As your codebase grows in complexity, the value of code search quickly increases. Sourcegraph may be a good fit for your team if: 27 | hidden 66 more line matches 28 | 29 | github.com/sourcegraph/sourcegraph-public-snapshot/doc/admin/executors/deploy_executors_terraform.md 30 | 1:# Deploying Sourcegraph executors using Terraform on AWS or GCP 31 | 56:## Terraform Version 32 | 415:### **Step 1:** Update the source version of the terraform modules 33 | hidden 68 more line matches 34 | 35 | github.com/sourcegraph/sourcegraph-public-snapshot/doc/dev/background-information/sg/reference.md 36 | 496:### sg lint format 37 | 505:### sg lint format 38 | 1: 39 | hidden 265 more line matches 40 | 41 | hidden 3 more file matches 42 | -------------------------------------------------------------------------------- /internal/e2e/testdata/bufio_buffer.txt: -------------------------------------------------------------------------------- 1 | queryString: bufio buffer 2 | query: (and substr:"bufio" substr:"buffer") 3 | targetRank: 2 4 | 5 | github.com/golang/go/src/bytes/buffer.go 6 | 20:type Buffer struct { 7 | 60:func (b *Buffer) AvailableBuffer() []byte { return b.buf[len(b.buf):] } 8 | 472:func NewBuffer(buf []byte) *Buffer { return &Buffer{buf: buf} } 9 | hidden 108 more line matches 10 | 11 | **github.com/golang/go/src/bufio/scan.go** 12 | 267:func (s *Scanner) Buffer(buf []byte, max int) { 13 | 5:package bufio 14 | 25:// large to fit in the buffer. When a scan stops, the reader may have 15 | hidden 21 more line matches 16 | 17 | github.com/golang/go/src/bufio/bufio.go 18 | 8:package bufio 19 | 665:func (b *Writer) AvailableBuffer() []byte { 20 | 338:func (b *Reader) Buffered() int { return b.w - b.r } 21 | hidden 89 more line matches 22 | 23 | github.com/golang/go/src/cmd/doc/pkg.go 24 | 59: bytes.Buffer 25 | 56:type pkgBuffer struct { 26 | 8: "bufio" 27 | hidden 8 more line matches 28 | 29 | github.com/golang/go/src/net/http/h2_bundle.go 30 | 3716:type http2pipeBuffer interface { 31 | 1086:type http2dataBuffer struct { 32 | 3724:func (p *http2pipe) setBuffer(b http2pipeBuffer) { 33 | hidden 116 more line matches 34 | 35 | github.com/golang/go/src/image/png/writer.go 36 | 36:type EncoderBuffer encoder 37 | 24: BufferPool EncoderBufferPool 38 | 30:type EncoderBufferPool interface { 39 | hidden 18 more line matches 40 | 41 | hidden 113 more file matches 42 | -------------------------------------------------------------------------------- /internal/e2e/testdata/bufio_flush_writer.txt: -------------------------------------------------------------------------------- 1 | queryString: bufio flush writer 2 | query: (and substr:"bufio" substr:"flush" substr:"writer") 3 | targetRank: 25 4 | 5 | github.com/golang/go/src/image/gif/writer.go 6 | 43:type writer interface { 7 | 77:func (b blockWriter) Flush() error { 8 | 123:func (e *encoder) flush() { 9 | hidden 28 more line matches 10 | 11 | github.com/golang/go/src/image/jpeg/writer.go 12 | 211:type writer interface { 13 | 231:func (e *encoder) flush() { 14 | 212: Flush() error 15 | hidden 11 more line matches 16 | 17 | github.com/golang/go/src/compress/lzw/writer.go 18 | 15:type writer interface { 19 | 36:type Writer struct { 20 | 17: Flush() error 21 | hidden 36 more line matches 22 | 23 | github.com/golang/go/src/bufio/bufio.go 24 | 579:type Writer struct { 25 | 635:func (b *Writer) Flush() error { 26 | 836: *Writer 27 | hidden 72 more line matches 28 | 29 | github.com/golang/go/src/archive/zip/writer.go 30 | 24:type Writer struct { 31 | 61:func (w *Writer) Flush() error { 32 | 607: io.Writer 33 | hidden 55 more line matches 34 | 35 | github.com/golang/go/src/encoding/csv/writer.go 36 | 30:type Writer struct { 37 | 123:func (w *Writer) Flush() { 38 | 37:func NewWriter(w io.Writer) *Writer { 39 | hidden 25 more line matches 40 | 41 | hidden 78 more file matches 42 | -------------------------------------------------------------------------------- /internal/e2e/testdata/bytes_buffer.txt: -------------------------------------------------------------------------------- 1 | queryString: bytes buffer 2 | query: (and substr:"bytes" substr:"buffer") 3 | targetRank: 1 4 | 5 | **github.com/golang/go/src/bytes/buffer.go** 6 | 20:type Buffer struct { 7 | 54:func (b *Buffer) Bytes() []byte { return b.buf[b.off:] } 8 | 5:package bytes 9 | hidden 126 more line matches 10 | 11 | github.com/golang/go/src/cmd/internal/edit/edit.go 12 | 14:type Buffer struct { 13 | 68:func (b *Buffer) Bytes() []byte { 14 | 41:func NewBuffer(data []byte) *Buffer { 15 | hidden 13 more line matches 16 | 17 | github.com/golang/go/src/hash/crc32/crc32_ppc64le.s 18 | 122: SLD $2,R8 // convert index-> bytes 19 | 59: MOVWZ 0(R5),R8 // 0-3 bytes of p ?Endian? 20 | 60: MOVWZ 4(R5),R9 // 4-7 bytes of p 21 | hidden 35 more line matches 22 | 23 | github.com/golang/go/src/fmt/print.go 24 | 101:type buffer []byte 25 | 509:func (p *pp) fmtBytes(v []byte, verb rune, typeString string) { 26 | 17:// Strings for use with buffer.WriteString. 27 | hidden 28 more line matches 28 | 29 | github.com/golang/go/src/bufio/scan.go 30 | 106:func (s *Scanner) Bytes() []byte { 31 | 267:func (s *Scanner) Buffer(buf []byte, max int) { 32 | 289:func ScanBytes(data []byte, atEOF bool) (advance int, token []byte, err error) { 33 | hidden 26 more line matches 34 | 35 | github.com/golang/go/src/os/exec/exec.go 36 | 1134:func (w *prefixSuffixSaver) Bytes() []byte { 37 | 94: "bytes" 38 | 396: if i := bytes.Index(stack, []byte("\nos/exec.Command(")); i >= 0 { 39 | hidden 17 more line matches 40 | 41 | hidden 494 more file matches 42 | -------------------------------------------------------------------------------- /internal/e2e/testdata/coverage_data_writer.txt: -------------------------------------------------------------------------------- 1 | queryString: coverage data writer 2 | query: (and substr:"coverage" substr:"data" substr:"writer") 3 | targetRank: 13 4 | 5 | github.com/golang/go/src/internal/coverage/stringtab/stringtab.go 6 | 19:type Writer struct { 7 | 27:func (stw *Writer) InitWriter() { 8 | 70:func (stw *Writer) Write(w io.Writer) error { 9 | hidden 16 more line matches 10 | 11 | github.com/golang/go/src/cmd/cover/func.go 12 | 149:func (f *FuncExtent) coverage(profile *cover.Profile) (num, den int64) { 13 | 30:// funcOutput takes two file names as arguments, a coverage profile to read as input and an output 14 | 32:// as output the coverage data broken down by function, like this: 15 | hidden 8 more line matches 16 | 17 | github.com/golang/go/src/testing/fuzz.go 18 | 93: Data []byte 19 | 205:// modify the underlying data of the arguments provided by the fuzzing engine. 20 | 275: run := func(captureOut io.Writer, e corpusEntry) (ok bool) { 21 | hidden 7 more line matches 22 | 23 | github.com/golang/go/src/cmd/cover/html.go 24 | 199: Coverage float64 25 | 170:type templateData struct { 26 | 21:// htmlOutput reads the profile data from profile and generates an HTML 27 | hidden 18 more line matches 28 | 29 | github.com/golang/go/src/internal/fuzz/fuzz.go 30 | 474: Data []byte 31 | 487:func corpusEntryData(ce CorpusEntry) ([]byte, error) { 32 | 908:func (c *coordinator) updateCoverage(newCoverage []byte) int { 33 | hidden 91 more line matches 34 | 35 | github.com/golang/go/src/cmd/vendor/golang.org/x/sys/unix/ztypes_linux.go 36 | 227: Data [7]byte 37 | 449: Data [8]uint32 38 | 2384: Data *byte 39 | hidden 85 more line matches 40 | 41 | hidden 35 more file matches 42 | -------------------------------------------------------------------------------- /internal/e2e/testdata/generate_unit_test.txt: -------------------------------------------------------------------------------- 1 | queryString: generate unit test 2 | query: (and substr:"generate" substr:"unit" substr:"test") 3 | targetRank: 11 4 | 5 | github.com/sourcegraph/sourcegraph-public-snapshot/cmd/frontend/internal/insights/resolvers/insight_series_resolver.go 6 | 300:func (j *seriesResolverGenerator) Generate(ctx context.Context, series types.InsightViewSeries, baseResolver baseInsightResolver, filters types.InsightViewFilters, options types.SeriesDisplayOptions) ([]graphqlbackend.InsightSeriesResolver, error) { 7 | 275: Generate(ctx context.Context, series types.InsightViewSeries, baseResolver baseInsightResolver, filters types.InsightViewFilters, options types.SeriesDisplayOptions) ([]graphqlbackend.InsightSeriesResolver, error) 8 | 286: generateResolver resolverGenerator 9 | hidden 16 more line matches 10 | 11 | github.com/golang/go/src/cmd/vendor/github.com/google/pprof/internal/report/report.go 12 | 87:func Generate(w io.Writer, rpt *Report, obj plugin.ObjTool) error { 13 | 187:func (rpt *Report) selectOutputUnit(g *graph.Graph) { 14 | 75: SampleUnit string // Unit for the sample data from the profile. 15 | hidden 48 more line matches 16 | 17 | github.com/sourcegraph/sourcegraph-public-snapshot/internal/codeintel/autoindexing/internal/inference/lua/test.lua 18 | 9: generate = function(_, paths) 19 | 6: patterns = { pattern.new_path_basename "sg-test" }, 20 | 8: -- Invoked as part of unit tests for the autoindexing service 21 | hidden 1 more line matches 22 | 23 | github.com/golang/go/src/cmd/internal/testdir/testdir_test.go 24 | 273:type test struct { 25 | 74:func Test(t *testing.T) { 26 | 263:type testCommon struct { 27 | hidden 120 more line matches 28 | 29 | github.com/golang/go/src/cmd/vendor/github.com/google/pprof/profile/profile.go 30 | 65: Unit string // seconds, nanoseconds, bytes, etc 31 | 77: NumUnit map[string][]string 32 | 68: unitX int64 33 | hidden 44 more line matches 34 | 35 | github.com/golang/go/src/cmd/link/internal/loader/loader.go 36 | 79: unit *sym.CompilationUnit 37 | 1544:func (l *Loader) SymUnit(i Sym) *sym.CompilationUnit { 38 | 228: generatedSyms Bitmap // symbols that generate their content, indexed by ext sym idx 39 | hidden 50 more line matches 40 | 41 | hidden 245 more file matches 42 | -------------------------------------------------------------------------------- /internal/e2e/testdata/graphql_type_User.txt: -------------------------------------------------------------------------------- 1 | queryString: graphql type User 2 | query: (and substr:"graphql" substr:"type" case_substr:"User") 3 | targetRank: 1 4 | 5 | **github.com/sourcegraph/sourcegraph-public-snapshot/cmd/frontend/graphqlbackend/schema.graphql** 6 | 6376:type User implements Node & SettingsSubject & Namespace { 7 | 3862: type: GitRefType 8 | 5037: type: GitRefType! 9 | hidden 460 more line matches 10 | 11 | github.com/sourcegraph/sourcegraph-public-snapshot/internal/types/types.go 12 | 850:type User struct { 13 | 1372: Type *SearchCountStatistics 14 | 1766: Type string 15 | hidden 234 more line matches 16 | 17 | github.com/sourcegraph/sourcegraph-public-snapshot/client/web/src/enterprise/insights/core/backend/gql-backend/methods/get-dashboard-owners.ts 18 | 22: type: InsightsDashboardOwnerType.Global, 19 | 32: type: InsightsDashboardOwnerType.Personal, 20 | 18: const { currentUser, site } = data 21 | hidden 8 more line matches 22 | 23 | github.com/sourcegraph/sourcegraph-public-snapshot/cmd/frontend/graphqlbackend/apitest/types.go 24 | 47:type User struct { 25 | 9: Typename string `json:"__typename"` 26 | 32: Typename string `json:"__typename"` 27 | hidden 11 more line matches 28 | 29 | github.com/sourcegraph/sourcegraph-public-snapshot/cmd/frontend/internal/batches/resolvers/apitest/types.go 30 | 52:type User struct { 31 | 364: User *User 32 | 393: Type string 33 | hidden 68 more line matches 34 | 35 | github.com/sourcegraph/sourcegraph-public-snapshot/internal/extsvc/github/common.go 36 | 2030:type User struct { 37 | 66: User *Actor `json:"User,omitempty"` 38 | 527: Type string 39 | hidden 136 more line matches 40 | 41 | hidden 494 more file matches 42 | -------------------------------------------------------------------------------- /internal/e2e/testdata/r_cody_sourcegraph_url.txt: -------------------------------------------------------------------------------- 1 | queryString: r:cody sourcegraph url 2 | query: (and repo:cody substr:"sourcegraph" substr:"url") 3 | targetRank: 1 4 | 5 | **github.com/sourcegraph/cody/lib/shared/src/sourcegraph-api/graphql/client.ts** 6 | 611: const url = buildGraphQLUrl({ request: query, baseUrl: this.config.serverEndpoint }) 7 | 626: const url = buildGraphQLUrl({ request: query, baseUrl: this.dotcomUrl.href }) 8 | 641: const url = 'http://localhost:49300/.api/testLogging' 9 | hidden 51 more line matches 10 | 11 | github.com/sourcegraph/cody/vscode/src/completions/client.ts 12 | 85: const url = getCodeCompletionsEndpoint() 13 | 1:import { FeatureFlag, featureFlagProvider } from '@sourcegraph/cody-shared/src/experimentation/FeatureFlagProvider' 14 | 5:} from '@sourcegraph/cody-shared/src/sourcegraph-api/completions/client' 15 | hidden 6 more line matches 16 | 17 | github.com/sourcegraph/cody/vscode/scripts/download-wasm-modules.ts 18 | 83: for (const url of urls) { 19 | 93:function getFilePathFromURL(url: string): string { 20 | 20:const urls = [ 21 | hidden 21 more line matches 22 | 23 | github.com/sourcegraph/cody/slack/src/services/local-vector-store.ts 24 | 18: const { content, url } = codyNotice 25 | 9: owner: 'sourcegraph', 26 | 24: fileName: url, 27 | 28 | github.com/sourcegraph/cody/lib/shared/src/sourcegraph-api/completions/client.ts 29 | 23:export abstract class SourcegraphCompletionsClient { 30 | 21: * Access the chat based LLM APIs via a Sourcegraph server instance. 31 | 36: return new URL('/.api/completions/stream', this.config.serverEndpoint).href 32 | hidden 1 more line matches 33 | 34 | github.com/sourcegraph/cody/lib/shared/src/sourcegraph-api/completions/browserClient.ts 35 | 8:export class SourcegraphBrowserCompletionsClient extends SourcegraphCompletionsClient { 36 | 5:import { SourcegraphCompletionsClient } from './client' 37 | 20: headersInstance.set('X-Sourcegraph-Should-Trace', 'true') 38 | hidden 1 more line matches 39 | 40 | hidden 71 more file matches 41 | -------------------------------------------------------------------------------- /internal/e2e/testdata/rank_stats.txt: -------------------------------------------------------------------------------- 1 | queries: 16 2 | recall@1: 9 (56%) 3 | recall@5: 11 (69%) 4 | mrr: 0.632037 5 | -------------------------------------------------------------------------------- /internal/e2e/testdata/sourcegraphserver_docker_image_build.txt: -------------------------------------------------------------------------------- 1 | queryString: sourcegraph/server docker image build 2 | query: (and substr:"sourcegraph/server" substr:"docker" substr:"image" substr:"build") 3 | targetRank: 14 4 | 5 | github.com/sourcegraph/sourcegraph-public-snapshot/dev/sg/internal/images/images.go 6 | 458: Build int 7 | 234:type ImageReference struct { 8 | 352:type ErrNoImage struct { 9 | hidden 118 more line matches 10 | 11 | github.com/sourcegraph/sourcegraph-public-snapshot/doc/admin/external_services/postgres.md 12 | 41:### sourcegraph/server 13 | 192:### sourcegraph/server 14 | 53:### Docker Compose 15 | hidden 19 more line matches 16 | 17 | github.com/sourcegraph/sourcegraph-public-snapshot/internal/conf/deploy/deploytype.go 18 | 66:func IsDeployTypePureDocker(deployType string) bool { 19 | 12: SingleDocker = "docker-container" 20 | 13: DockerCompose = "docker-compose" 21 | hidden 19 more line matches 22 | 23 | github.com/sourcegraph/sourcegraph-public-snapshot/schema/schema.go 24 | 2621: ExecutorsBatcheshelperImage string `json:"executors.batcheshelperImage,omitempty"` 25 | 2627: ExecutorsLsifGoImage string `json:"executors.lsifGoImage,omitempty"` 26 | 2631: ExecutorsSrcCLIImage string `json:"executors.srcCLIImage,omitempty"` 27 | hidden 22 more line matches 28 | 29 | github.com/sourcegraph/sourcegraph-public-snapshot/internal/updatecheck/handler.go 30 | 40: latestReleaseDockerServerImageBuild = newPingResponse("5.1.8") 31 | 45: latestReleaseKubernetesBuild = newPingResponse("5.1.8") 32 | 50: latestReleaseDockerComposeOrPureDocker = newPingResponse("5.1.8") 33 | hidden 19 more line matches 34 | 35 | github.com/sourcegraph/sourcegraph-public-snapshot/doc/admin/deploy/docker-single-container/index.md 36 | 1:# Docker Single Container Deployment 37 | 294:### Insiders build 38 | 238:### File system performance on Docker for Mac 39 | hidden 52 more line matches 40 | 41 | hidden 15 more file matches 42 | -------------------------------------------------------------------------------- /internal/e2e/testdata/test_server.txt: -------------------------------------------------------------------------------- 1 | queryString: test server 2 | query: (and substr:"test" substr:"server") 3 | targetRank: 1 4 | 5 | **github.com/golang/go/src/net/http/httptest/server.go** 6 | 26:type Server struct { 7 | 105:func NewServer(handler http.Handler) *Server { 8 | 117:func NewUnstartedServer(handler http.Handler) *Server { 9 | hidden 62 more line matches 10 | 11 | github.com/golang/go/src/net/rpc/server.go 12 | 188:type Server struct { 13 | 656:type ServerCodec interface { 14 | 197:func NewServer() *Server { 15 | hidden 104 more line matches 16 | 17 | github.com/sourcegraph/cody/vscode/test/fixtures/mock-server.ts 18 | 126: const server = app.listen(SERVER_PORT, () => { 19 | 19:const SERVER_PORT = 49300 20 | 21:export const SERVER_URL = 'http://localhost:49300' 21 | hidden 24 more line matches 22 | 23 | github.com/golang/go/src/net/http/server.go 24 | 2617:type Server struct { 25 | 256: server *Server 26 | 2925:type serverHandler struct { 27 | hidden 180 more line matches 28 | 29 | github.com/sourcegraph/sourcegraph-public-snapshot/cmd/gitserver/server/server.go 30 | 132:type Server struct { 31 | 2:package server 32 | 741:func (s *Server) serverContext() (context.Context, context.CancelFunc) { 33 | hidden 166 more line matches 34 | 35 | github.com/sourcegraph/sourcegraph-public-snapshot/cmd/frontend/graphqlbackend/testing.go 36 | 46:type Test struct { 37 | 79:func RunTest(t *testing.T, test *Test) { 38 | 58:func RunTests(t *testing.T, tests []*Test) { 39 | hidden 27 more line matches 40 | 41 | hidden 494 more file matches 42 | -------------------------------------------------------------------------------- /internal/e2e/testdata/time_compare.txt: -------------------------------------------------------------------------------- 1 | queryString: time compare\( 2 | query: (and substr:"time" substr:"compare(") 3 | targetRank: 1 4 | 5 | **github.com/golang/go/src/time/time.go** 6 | 129:type Time struct { 7 | 79:package time 8 | 271:func (t Time) Compare(u Time) int { 9 | hidden 250 more line matches 10 | 11 | github.com/sourcegraph/sourcegraph-public-snapshot/internal/api/api.go 12 | 127:func (r ExternalRepoSpec) Compare(s ExternalRepoSpec) int { 13 | 7: "time" 14 | 170: CreatedAt time.Time // the date when this settings value was created 15 | 16 | github.com/sourcegraph/sourcegraph-public-snapshot/client/shared/src/codeintel/scip.ts 17 | 117: public compare(other: Range): number { 18 | 53: return this.compare(other) < 0 19 | 56: return this.compare(other) <= 0 20 | hidden 10 more line matches 21 | 22 | github.com/golang/go/src/strings/compare.go 23 | 13:func Compare(a, b string) int { 24 | 14: // NOTE(rsc): This function does NOT call the runtime cmpstring function, 25 | 26 | github.com/golang/go/src/go/constant/value.go 27 | 1337:func Compare(x_ Value, op token.Token, y_ Value) bool { 28 | 1102:// Division by zero leads to a run-time panic. 29 | 1381: re := Compare(x.re, token.EQL, y.re) 30 | hidden 1 more line matches 31 | 32 | github.com/golang/go/src/cmd/go/internal/gover/gover.go 33 | 36:func Compare(x, y string) int { 34 | 20:// but at the time this code was written, there was an existing test that used 35 | 49: if c := cmp.Compare(vx.kind, vy.kind); c != 0 { // "" < alpha < beta < rc 36 | hidden 4 more line matches 37 | 38 | hidden 139 more file matches 39 | -------------------------------------------------------------------------------- /internal/e2e/testdata/zoekt_searcher.txt: -------------------------------------------------------------------------------- 1 | queryString: zoekt searcher 2 | query: (and substr:"zoekt" substr:"searcher") 3 | targetRank: 1 4 | 5 | **github.com/sourcegraph/zoekt/api.go** 6 | 824:type Searcher interface { 7 | 15:package zoekt // import "github.com/sourcegraph/zoekt" 8 | 697: ZoektVersion string 9 | hidden 13 more line matches 10 | 11 | github.com/sourcegraph/zoekt/rpc/internal/srv/srv.go 12 | 33:type Searcher struct { 13 | 34: Searcher zoekt.Searcher 14 | 37:func (s *Searcher) Search(ctx context.Context, args *SearchArgs, reply *SearchReply) error { 15 | hidden 9 more line matches 16 | 17 | github.com/sourcegraph/sourcegraph-public-snapshot/doc/admin/observability/dashboards.md 18 | 16264:## Searcher 19 | 19728:## Zoekt 20 | 16371:### Searcher: Cache store 21 | hidden 713 more line matches 22 | 23 | github.com/sourcegraph/sourcegraph-public-snapshot/monitoring/definitions/searcher.go 24 | 12:func Searcher() *monitoring.Dashboard { 25 | 14: containerName = "searcher" 26 | 15: grpcServiceName = "searcher.v1.SearcherService" 27 | hidden 31 more line matches 28 | 29 | github.com/sourcegraph/sourcegraph-public-snapshot/internal/search/job/job.go 30 | 73: Zoekt zoekt.Streamer 31 | 74: SearcherURLs *endpoint.Map 32 | 75: SearcherGRPCConnectionCache *defaults.ConnectionCache 33 | hidden 1 more line matches 34 | 35 | github.com/sourcegraph/zoekt/json/json.go 36 | 26: Searcher zoekt.Searcher 37 | 25:type jsonSearcher struct { 38 | 48:func (s *jsonSearcher) jsonSearch(w http.ResponseWriter, req *http.Request) { 39 | hidden 16 more line matches 40 | 41 | hidden 119 more file matches 42 | -------------------------------------------------------------------------------- /internal/gitindex/clone_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package gitindex 16 | 17 | import ( 18 | "os/exec" 19 | "testing" 20 | 21 | git "github.com/go-git/go-git/v5" 22 | ) 23 | 24 | func TestSetRemote(t *testing.T) { 25 | dir := t.TempDir() 26 | 27 | script := `mkdir orig 28 | cd orig 29 | git init -b master 30 | cd .. 31 | git clone orig/.git clone.git 32 | ` 33 | 34 | cmd := exec.Command("/bin/sh", "-euxc", script) 35 | cmd.Dir = dir 36 | 37 | if out, err := cmd.CombinedOutput(); err != nil { 38 | t.Fatalf("execution error: %v, output %s", err, out) 39 | } 40 | 41 | r := dir + "/clone.git" 42 | if err := setFetch(r, "origin", "+refs/heads/*:refs/heads/*"); err != nil { 43 | t.Fatalf("addFetch: %v", err) 44 | } 45 | 46 | repo, err := git.PlainOpen(r) 47 | if err != nil { 48 | t.Fatal("PlainOpen", err) 49 | } 50 | 51 | rm, err := repo.Remote("origin") 52 | if err != nil { 53 | t.Fatal("Remote", err) 54 | } 55 | if got, want := rm.Config().Fetch[0].String(), "+refs/heads/*:refs/heads/*"; got != want { 56 | t.Fatalf("got %q want %q", got, want) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /internal/gitindex/delete.go: -------------------------------------------------------------------------------- 1 | package gitindex 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net/url" 7 | "os" 8 | "path/filepath" 9 | "strings" 10 | ) 11 | 12 | // DeleteRepos deletes stale repos under a specific path in disk. The `names` 13 | // argument stores names of repos retrieved from the git hosting site 14 | // and is used along with the `filter` argument to decide on repo deletion. 15 | func DeleteRepos(baseDir string, urlPrefix *url.URL, names map[string]struct{}, filter *Filter) error { 16 | paths, err := ListRepos(baseDir, urlPrefix) 17 | if err != nil { 18 | return err 19 | } 20 | var toDelete []string 21 | for _, p := range paths { 22 | _, exists := names[p] 23 | repoName := strings.Replace(p, filepath.Join(urlPrefix.Host, urlPrefix.Path), "", 1) 24 | repoName = strings.TrimPrefix(repoName, "/") 25 | if filter.Include(repoName) && !exists { 26 | toDelete = append(toDelete, p) 27 | } 28 | } 29 | 30 | if len(toDelete) > 0 { 31 | log.Printf("deleting repos %v", toDelete) 32 | } 33 | 34 | var errs []string 35 | for _, d := range toDelete { 36 | if err := os.RemoveAll(filepath.Join(baseDir, d)); err != nil { 37 | errs = append(errs, err.Error()) 38 | } 39 | } 40 | if len(errs) > 0 { 41 | return fmt.Errorf("errors: %v", errs) 42 | } 43 | return nil 44 | } 45 | -------------------------------------------------------------------------------- /internal/gitindex/delete_test.go: -------------------------------------------------------------------------------- 1 | package gitindex 2 | 3 | import ( 4 | "net/url" 5 | "path/filepath" 6 | "reflect" 7 | "testing" 8 | ) 9 | 10 | func TestDeleteRepos(t *testing.T) { 11 | dir := t.TempDir() 12 | 13 | if err := createSubmoduleRepo(dir); err != nil { 14 | t.Error("createSubmoduleRepo", err) 15 | } 16 | 17 | reposBefore, err := FindGitRepos(dir) 18 | if err != nil { 19 | t.Error("FindGitRepos", err) 20 | } 21 | 22 | gotBefore := map[string]struct{}{} 23 | for _, r := range reposBefore { 24 | p, err := filepath.Rel(dir, r) 25 | if err != nil { 26 | t.Fatalf("Relative: %v", err) 27 | } 28 | 29 | gotBefore[p] = struct{}{} 30 | } 31 | 32 | wantBefore := map[string]struct{}{ 33 | "adir/.git": {}, 34 | "bdir/.git": {}, 35 | "gerrit.googlesource.com/adir.git": {}, 36 | "gerrit.googlesource.com/bdir.git": {}, 37 | "gerrit.googlesource.com/sub/bdir.git": {}, 38 | "gerrit.googlesource.com/team/scope/repoa.git": {}, 39 | "gerrit.googlesource.com/team/scope/repob.git": {}, 40 | } 41 | 42 | if !reflect.DeepEqual(gotBefore, wantBefore) { 43 | t.Fatalf("got %v want %v", gotBefore, wantBefore) 44 | } 45 | 46 | aURL, _ := url.Parse("http://gerrit.googlesource.com") 47 | aURL.Path = "sub" 48 | names := map[string]struct{}{ 49 | "bdir/.git": {}, 50 | "gerrit.googlesource.com/adir.git": {}, 51 | } 52 | filter, _ := NewFilter("", "") 53 | 54 | err = DeleteRepos(dir, aURL, names, filter) 55 | if err != nil { 56 | t.Fatalf("DeleteRepos: %T", err) 57 | } 58 | 59 | bURL, _ := url.Parse("http://gerrit.googlesource.com") 60 | bURL.Path = "" 61 | names = map[string]struct{}{ 62 | "gerrit.googlesource.com/adir.git": {}, 63 | "gerrit.googlesource.com/bdir.git": {}, 64 | "gerrit.googlesource.com/team/scope/repob.git": {}, 65 | } 66 | 67 | err = DeleteRepos(dir, bURL, names, filter) 68 | if err != nil { 69 | t.Fatalf("DeleteRepos: %T", err) 70 | } 71 | 72 | reposAfter, err := FindGitRepos(dir) 73 | if err != nil { 74 | t.Error("FindGitRepos", err) 75 | } 76 | 77 | gotAfter := map[string]struct{}{} 78 | for _, r := range reposAfter { 79 | p, err := filepath.Rel(dir, r) 80 | if err != nil { 81 | t.Fatalf("Relative: %v", err) 82 | } 83 | 84 | gotAfter[p] = struct{}{} 85 | } 86 | wantAfter := map[string]struct{}{ 87 | "adir/.git": {}, 88 | "bdir/.git": {}, 89 | "gerrit.googlesource.com/adir.git": {}, 90 | "gerrit.googlesource.com/bdir.git": {}, 91 | "gerrit.googlesource.com/team/scope/repob.git": {}, 92 | } 93 | 94 | if !reflect.DeepEqual(gotAfter, wantAfter) { 95 | t.Errorf("got %v want %v", gotAfter, wantAfter) 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /internal/gitindex/filter.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package gitindex 16 | 17 | import "github.com/grafana/regexp" 18 | 19 | // Filter is a include/exclude filter to be used for repo names. 20 | type Filter struct { 21 | inc, exc *regexp.Regexp 22 | } 23 | 24 | // Include returns true if the name passes the filter. 25 | func (f *Filter) Include(name string) bool { 26 | if f.inc != nil { 27 | if !f.inc.MatchString(name) { 28 | return false 29 | } 30 | } 31 | if f.exc != nil { 32 | if f.exc.MatchString(name) { 33 | return false 34 | } 35 | } 36 | return true 37 | } 38 | 39 | // NewFilter creates a new filter. 40 | func NewFilter(includeRegex, excludeRegex string) (*Filter, error) { 41 | f := &Filter{} 42 | var err error 43 | if includeRegex != "" { 44 | f.inc, err = regexp.Compile(includeRegex) 45 | if err != nil { 46 | return nil, err 47 | } 48 | } 49 | if excludeRegex != "" { 50 | f.exc, err = regexp.Compile(excludeRegex) 51 | if err != nil { 52 | return nil, err 53 | } 54 | } 55 | 56 | return f, nil 57 | } 58 | -------------------------------------------------------------------------------- /internal/gitindex/ignore_test.go: -------------------------------------------------------------------------------- 1 | package gitindex 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "os/exec" 8 | "path/filepath" 9 | "reflect" 10 | "testing" 11 | 12 | "github.com/sourcegraph/zoekt" 13 | "github.com/sourcegraph/zoekt/index" 14 | "github.com/sourcegraph/zoekt/query" 15 | "github.com/sourcegraph/zoekt/search" 16 | ) 17 | 18 | func createSourcegraphignoreRepo(dir string) error { 19 | if err := os.MkdirAll(dir, 0o755); err != nil { 20 | return err 21 | } 22 | script := `mkdir repo 23 | cd repo 24 | git init -b master 25 | mkdir subdir 26 | echo acont > afile 27 | echo sub-cont > subdir/sub-file 28 | git add afile subdir/sub-file 29 | git config user.email "you@example.com" 30 | git config user.name "Your Name" 31 | git commit -am amsg 32 | 33 | git branch branchdir/abranch 34 | 35 | mkdir .sourcegraph 36 | echo subdir/ > .sourcegraph/ignore 37 | git add .sourcegraph/ignore 38 | git commit -am "ignore subdir/" 39 | 40 | git update-ref refs/meta/config HEAD 41 | ` 42 | cmd := exec.Command("/bin/sh", "-euxc", script) 43 | cmd.Dir = dir 44 | if out, err := cmd.CombinedOutput(); err != nil { 45 | return fmt.Errorf("execution error: %v, output %s", err, out) 46 | } 47 | return nil 48 | } 49 | 50 | func TestIgnore(t *testing.T) { 51 | dir := t.TempDir() 52 | 53 | if err := createSourcegraphignoreRepo(dir); err != nil { 54 | t.Fatalf("createSourcegraphignoreRepo: %v", err) 55 | } 56 | 57 | indexDir := t.TempDir() 58 | 59 | buildOpts := index.Options{ 60 | IndexDir: indexDir, 61 | RepositoryDescription: zoekt.Repository{ 62 | Name: "repo", 63 | }, 64 | } 65 | buildOpts.SetDefaults() 66 | 67 | opts := Options{ 68 | RepoDir: filepath.Join(dir + "/repo"), 69 | BuildOptions: buildOpts, 70 | BranchPrefix: "refs/heads", 71 | Branches: []string{"master", "branchdir/*"}, 72 | Submodules: true, 73 | Incremental: true, 74 | } 75 | if _, err := IndexGitRepo(opts); err != nil { 76 | t.Fatalf("IndexGitRepo: %v", err) 77 | } 78 | 79 | searcher, err := search.NewDirectorySearcher(indexDir) 80 | if err != nil { 81 | t.Fatal("NewDirectorySearcher", err) 82 | } 83 | defer searcher.Close() 84 | 85 | res, err := searcher.Search(context.Background(), &query.Substring{}, &zoekt.SearchOptions{}) 86 | if err != nil { 87 | t.Fatal(err) 88 | } 89 | 90 | if len(res.Files) != 3 { 91 | t.Fatalf("expected 3 file matches") 92 | } 93 | for _, match := range res.Files { 94 | switch match.FileName { 95 | case "afile": 96 | if !reflect.DeepEqual(match.Branches, []string{"master", "branchdir/abranch"}) { 97 | t.Fatalf("expected afile to be present on both branches") 98 | } 99 | case "subdir/sub-file": 100 | if len(match.Branches) != 1 || match.Branches[0] != "branchdir/abranch" { 101 | t.Fatalf("expected sub-file to be present only on branchdir/abranch") 102 | } 103 | case ".sourcegraph/ignore": 104 | if len(match.Branches) != 1 || match.Branches[0] != "master" { 105 | t.Fatalf("expected sourcegraphignore to be present only on master") 106 | } 107 | default: 108 | t.Fatalf("match %+v not handled", match) 109 | } 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /internal/gitindex/repocache_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package gitindex 16 | 17 | import ( 18 | "net/url" 19 | "reflect" 20 | "sort" 21 | "testing" 22 | ) 23 | 24 | func TestListReposNonExistent(t *testing.T) { 25 | u, err := url.Parse("https://gerrit.googlesource.com/") 26 | if err != nil { 27 | t.Fatalf("url.Parse: %v", err) 28 | } 29 | 30 | rs, err := ListRepos("/doesnotexist", u) 31 | if err == nil { 32 | t.Fatalf("ListRepos(/doesnotexist): %v", rs) 33 | } 34 | } 35 | 36 | func TestListRepos(t *testing.T) { 37 | dir := t.TempDir() 38 | 39 | if err := createSubmoduleRepo(dir); err != nil { 40 | t.Fatalf("createSubmoduleRepo %v", err) 41 | } 42 | 43 | u, err := url.Parse("https://gerrit.googlesource.com/") 44 | if err != nil { 45 | t.Fatalf("url.Parse: %v", err) 46 | } 47 | rs, err := ListRepos(dir, u) 48 | if err != nil { 49 | t.Fatalf("ListRepos(%s): %v", u, err) 50 | } 51 | 52 | want := []string{ 53 | "gerrit.googlesource.com/adir.git", 54 | "gerrit.googlesource.com/bdir.git", 55 | "gerrit.googlesource.com/sub/bdir.git", 56 | "gerrit.googlesource.com/team/scope/repoa.git", 57 | "gerrit.googlesource.com/team/scope/repob.git", 58 | } 59 | sort.Strings(rs) 60 | 61 | if !reflect.DeepEqual(rs, want) { 62 | t.Fatalf("got %v, want %v", rs, want) 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /internal/gitindex/submodule.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package gitindex 16 | 17 | import ( 18 | "bytes" 19 | "fmt" 20 | 21 | "github.com/go-git/go-git/v5/plumbing/format/config" 22 | ) 23 | 24 | // SubmoduleEntry represent one entry in a .gitmodules file 25 | type SubmoduleEntry struct { 26 | Path string 27 | URL string 28 | Branch string 29 | } 30 | 31 | // ParseGitModules parses the contents of a .gitmodules file. 32 | func ParseGitModules(content []byte) (map[string]*SubmoduleEntry, error) { 33 | buf := bytes.NewBuffer(content) 34 | 35 | // Handle the possibility that .gitmodules has a UTF-8 BOM, which would 36 | // otherwise break the scanner. 37 | // https://stackoverflow.com/a/21375405 38 | skipIfPrefix(buf, []byte("\uFEFF")) 39 | 40 | dec := config.NewDecoder(buf) 41 | cfg := &config.Config{} 42 | 43 | if err := dec.Decode(cfg); err != nil { 44 | return nil, fmt.Errorf("error decoding content %s: %w", string(content), err) 45 | } 46 | 47 | result := map[string]*SubmoduleEntry{} 48 | for _, s := range cfg.Sections { 49 | if s.Name != "submodule" { 50 | continue 51 | } 52 | 53 | for _, ss := range s.Subsections { 54 | name := ss.Name 55 | e := &SubmoduleEntry{} 56 | for _, o := range ss.Options { 57 | switch o.Key { 58 | case "branch": 59 | e.Branch = o.Value 60 | case "path": 61 | e.Path = o.Value 62 | case "url": 63 | e.URL = o.Value 64 | } 65 | } 66 | 67 | result[name] = e 68 | } 69 | } 70 | 71 | return result, nil 72 | } 73 | 74 | // skipIfPrefix will detect if the unread portion of buf starts with 75 | // prefix. If it does, it will read over those bytes. 76 | func skipIfPrefix(buf *bytes.Buffer, prefix []byte) { 77 | if bytes.HasPrefix(buf.Bytes(), prefix) { 78 | buf.Next(len(prefix)) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /internal/gitindex/submodule_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package gitindex 16 | 17 | import ( 18 | "reflect" 19 | "testing" 20 | ) 21 | 22 | func TestParseGitModules(t *testing.T) { 23 | cases := []struct { 24 | data string 25 | want map[string]*SubmoduleEntry 26 | }{ 27 | { 28 | `[submodule "plugins/abc"] 29 | path = plugins/abc 30 | url = ../plugins/abc 31 | branch = .`, 32 | map[string]*SubmoduleEntry{ 33 | "plugins/abc": { 34 | Path: "plugins/abc", 35 | URL: "../plugins/abc", 36 | Branch: ".", 37 | }, 38 | }, 39 | }, 40 | { 41 | "\uFEFF" + `[submodule "plugins/abc"] 42 | path = plugins/abc 43 | url = ../plugins/abc 44 | branch = .`, 45 | map[string]*SubmoduleEntry{ 46 | "plugins/abc": { 47 | Path: "plugins/abc", 48 | URL: "../plugins/abc", 49 | Branch: ".", 50 | }, 51 | }, 52 | }, 53 | {"", map[string]*SubmoduleEntry{}}, 54 | } 55 | 56 | for _, tc := range cases { 57 | got, err := ParseGitModules([]byte(tc.data)) 58 | if err != nil { 59 | t.Fatalf("ParseGitModules: %T", err) 60 | } 61 | 62 | if !reflect.DeepEqual(got, tc.want) { 63 | t.Fatalf("got %v, want %v", got, tc.want) 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /internal/languages/language.go: -------------------------------------------------------------------------------- 1 | // This file wraps the logic of go-enry (https://github.com/go-enry/go-enry) to support additional languages. 2 | // go-enry is based off of a package called Linguist (https://github.com/github/linguist) 3 | // and sometimes programming languages may not be supported by Linguist 4 | // or may take a while to get merged in and make it into go-enry. This wrapper 5 | // gives us flexibility to support languages in those cases. We list additional languages 6 | // in this file and remove them once they make it into Linguist and go-enry. 7 | // This logic is similar to what we have in the sourcegraph/sourcegraph repo, in the future 8 | // we plan to refactor both into a common library to share between the two repos. 9 | package languages 10 | 11 | import ( 12 | "path/filepath" 13 | "strings" 14 | 15 | "github.com/go-enry/go-enry/v2" 16 | ) 17 | 18 | var unsupportedByLinguistAliasMap = map[string]string{ 19 | // Extensions for the Apex programming language 20 | // See https://developer.salesforce.com/docs/atlas.en-us.apexcode.meta/apexcode/apex_dev_guide.htm 21 | "apex": "Apex", 22 | // Pkl Configuration Language (https://pkl-lang.org/) 23 | // Add to linguist on 6/7/24 24 | // can remove once go-enry package updates 25 | // to that linguist version 26 | "pkl": "Pkl", 27 | // Magik Language 28 | "magik": "Magik", 29 | } 30 | 31 | var unsupportedByLinguistExtensionToNameMap = map[string]string{ 32 | ".apex": "Apex", 33 | ".apxt": "Apex", 34 | ".apxc": "Apex", 35 | ".cls": "Apex", 36 | ".trigger": "Apex", 37 | // Pkl Configuration Language (https://pkl-lang.org/) 38 | ".pkl": "Pkl", 39 | // Magik Language 40 | ".magik": "Magik", 41 | } 42 | 43 | // getLanguagesByAlias is a replacement for enry.GetLanguagesByAlias 44 | // It supports languages that are missing in linguist 45 | func GetLanguageByAlias(alias string) (language string, ok bool) { 46 | language, ok = enry.GetLanguageByAlias(alias) 47 | if !ok { 48 | normalizedAlias := strings.ToLower(alias) 49 | language, ok = unsupportedByLinguistAliasMap[normalizedAlias] 50 | } 51 | 52 | return 53 | } 54 | 55 | // GetLanguage is a replacement for enry.GetLanguage 56 | // to find out the most probable language to return but includes support 57 | // for languages missing from linguist 58 | func GetLanguage(filename string, content []byte) (language string) { 59 | language = enry.GetLanguage(filename, content) 60 | 61 | // If go-enry failed to find language, fall back on our 62 | // internal check for languages missing in linguist 63 | if language == "" { 64 | ext := filepath.Ext(filename) 65 | normalizedExt := strings.ToLower(ext) 66 | if ext == "" { 67 | return 68 | } 69 | if lang, ok := unsupportedByLinguistExtensionToNameMap[normalizedExt]; ok { 70 | language = lang 71 | } 72 | } 73 | return 74 | } 75 | -------------------------------------------------------------------------------- /internal/languages/language_test.go: -------------------------------------------------------------------------------- 1 | package languages 2 | 3 | import "testing" 4 | 5 | func TestGetLanguageByAlias(t *testing.T) { 6 | tests := []struct { 7 | name string 8 | alias string 9 | want string 10 | wantOk bool 11 | }{ 12 | { 13 | name: "empty alias", 14 | alias: "", 15 | want: "", 16 | wantOk: false, 17 | }, 18 | { 19 | name: "unknown alias", 20 | alias: "unknown", 21 | want: "", 22 | wantOk: false, 23 | }, 24 | { 25 | name: "supported alias", 26 | alias: "go", 27 | want: "Go", 28 | wantOk: true, 29 | }, 30 | { 31 | name: "unsupported by linguist alias", 32 | alias: "magik", 33 | want: "Magik", 34 | wantOk: true, 35 | }, 36 | { 37 | name: "unsupported by linguist alias normalized", 38 | alias: "mAgIk", 39 | want: "Magik", 40 | wantOk: true, 41 | }, 42 | { 43 | name: "apex example unsupported by linguist alias", 44 | alias: "apex", 45 | want: "Apex", 46 | wantOk: true, 47 | }, 48 | } 49 | 50 | for _, tt := range tests { 51 | t.Run(tt.name, func(t *testing.T) { 52 | got, ok := GetLanguageByAlias(tt.alias) 53 | if got != tt.want || ok != tt.wantOk { 54 | t.Errorf("GetLanguageByAlias(%q) = %q, %t, want %q, %t", tt.alias, got, ok, tt.want, tt.wantOk) 55 | } 56 | }) 57 | } 58 | } 59 | 60 | func TestGetLanguage(t *testing.T) { 61 | tests := []struct { 62 | name string 63 | filename string 64 | content []byte 65 | want string 66 | }{ 67 | { 68 | name: "empty filename", 69 | filename: "", 70 | content: []byte(""), 71 | want: "", 72 | }, 73 | { 74 | name: "unknown extension", 75 | filename: "file.unknown", 76 | content: []byte(""), 77 | want: "", 78 | }, 79 | { 80 | name: "supported extension", 81 | filename: "file.go", 82 | content: []byte("package main"), 83 | want: "Go", 84 | }, 85 | { 86 | name: "magik: unsupported by linguist extension", 87 | filename: "file.magik", 88 | content: []byte(""), 89 | want: "Magik", 90 | }, 91 | { 92 | name: "apex: unsupported by linguist extension", 93 | filename: "file.apxc", 94 | content: []byte(""), 95 | want: "Apex", 96 | }, 97 | } 98 | 99 | for _, tt := range tests { 100 | t.Run(tt.name, func(t *testing.T) { 101 | got := GetLanguage(tt.filename, tt.content) 102 | if got != tt.want { 103 | t.Errorf("GetLanguage(%q, %q) = %q, want %q", tt.filename, tt.content, got, tt.want) 104 | } 105 | }) 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /internal/mockSearcher/mock_searcher.go: -------------------------------------------------------------------------------- 1 | package mockSearcher 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/sourcegraph/zoekt" 8 | "github.com/sourcegraph/zoekt/query" 9 | ) 10 | 11 | type MockSearcher struct { 12 | WantSearch query.Q 13 | SearchResult *zoekt.SearchResult 14 | 15 | WantList query.Q 16 | RepoList *zoekt.RepoList 17 | } 18 | 19 | func (s *MockSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) { 20 | if q.String() != s.WantSearch.String() { 21 | return nil, fmt.Errorf("got query %s != %s", q.String(), s.WantSearch.String()) 22 | } 23 | return s.SearchResult, nil 24 | } 25 | 26 | func (s *MockSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) { 27 | if q.String() != s.WantList.String() { 28 | return nil, fmt.Errorf("got query %s != %s", q.String(), s.WantList.String()) 29 | } 30 | return s.RepoList, nil 31 | } 32 | 33 | func (*MockSearcher) Close() {} 34 | 35 | func (*MockSearcher) String() string { 36 | return "MockSearcher" 37 | } 38 | -------------------------------------------------------------------------------- /internal/otlpenv/otlpenv.go: -------------------------------------------------------------------------------- 1 | // Package otlpenv exports getters to read OpenTelemetry protocol configuration options 2 | // based on the official spec: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md#configuration-options 3 | package otlpenv 4 | 5 | import ( 6 | "os" 7 | "strings" 8 | ) 9 | 10 | // getWithDefault returns the default value if no env in keys is set, or the first env from keys that is 11 | // set. 12 | func getWithDefault(def string, keys ...string) string { 13 | for _, k := range keys { 14 | if v, set := os.LookupEnv(k); set { 15 | return v 16 | } 17 | } 18 | return def 19 | } 20 | 21 | // This is a custom default that's also not quite compliant but hopefully close enough (we 22 | // use 127.0.0.1 instead of localhost, since there's a linter rule banning localhost). 23 | const defaultGRPCCollectorEndpoint = "http://127.0.0.1:4317" 24 | 25 | // GetEndpoint returns the root collector endpoint, NOT per-signal endpoints. We do not 26 | // yet support per-signal endpoints. 27 | // 28 | // See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md#configuration-options 29 | func GetEndpoint() string { 30 | return getWithDefault(defaultGRPCCollectorEndpoint, 31 | "OTEL_EXPORTER_OTLP_ENDPOINT") 32 | } 33 | 34 | type Protocol string 35 | 36 | const ( 37 | // ProtocolGRPC is protobuf-encoded data using gRPC wire format over HTTP/2 connection 38 | ProtocolGRPC Protocol = "grpc" 39 | // ProtocolHTTPProto is protobuf-encoded data over HTTP connection 40 | ProtocolHTTPProto Protocol = "http/proto" 41 | // ProtocolHTTPJSON is JSON-encoded data over HTTP connection 42 | ProtocolHTTPJSON Protocol = "http/json" 43 | ) 44 | 45 | // GetProtocol returns the configured protocol for the root collector endpoint, NOT 46 | // per-signal endpoints. We do not yet support per-signal endpoints. 47 | // 48 | // See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md#specify-protocol 49 | func GetProtocol() Protocol { 50 | return Protocol(getWithDefault(string(ProtocolGRPC), 51 | "OTEL_EXPORTER_OTLP_PROTOCOL")) 52 | } 53 | 54 | func IsInsecure(endpoint string) bool { 55 | return strings.HasPrefix(strings.ToLower(endpoint), "http://") 56 | } 57 | -------------------------------------------------------------------------------- /internal/profiler/profiler.go: -------------------------------------------------------------------------------- 1 | package profiler 2 | 3 | import ( 4 | "log" 5 | "os" 6 | 7 | "cloud.google.com/go/profiler" 8 | 9 | "github.com/sourcegraph/zoekt/index" 10 | ) 11 | 12 | // Init starts the supported profilers IFF the environment variable is set. 13 | func Init(svcName string) { 14 | if os.Getenv("GOOGLE_CLOUD_PROFILER_ENABLED") != "" { 15 | err := profiler.Start(profiler.Config{ 16 | Service: svcName, 17 | ServiceVersion: index.Version, 18 | MutexProfiling: true, 19 | AllocForceGC: true, 20 | }) 21 | if err != nil { 22 | log.Printf("could not initialize profiler: %s", err.Error()) 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /internal/syntaxutil/README.md: -------------------------------------------------------------------------------- 1 | # vendored std regexp/syntax 2 | 3 | This package contains a vendored copy of std regexp/syntax. However, it only 4 | contains the code for converting syntax.Regexp into a String. It is the 5 | version of the code at a recent go commit, but with a commit which introduces 6 | a significant performance regression reverted. 7 | 8 | At the time of writing regexp.String on go1.22 is taking 40% of CPU at 9 | Sourcegraph. This should return to ~0% with this vendored code. 10 | 11 | https://github.com/sourcegraph/sourcegraph/issues/61462 12 | 13 | ## Vendored commit 14 | 15 | ``` 16 | commit 2e1003e2f7e42efc5771812b9ee6ed264803796c 17 | Author: Daniel Martí 18 | Date: Tue Mar 26 22:59:41 2024 +0200 19 | 20 | cmd/go: replace reflect.DeepEqual with slices.Equal and maps.Equal 21 | 22 | All of these maps and slices are made up of comparable types, 23 | so we can avoid the overhead of reflection entirely. 24 | 25 | Change-Id: If77dbe648a336ba729c171e84c9ff3f7e160297d 26 | Reviewed-on: https://go-review.googlesource.com/c/go/+/574597 27 | Reviewed-by: Than McIntosh 28 | LUCI-TryBot-Result: Go LUCI 29 | Reviewed-by: Ian Lance Taylor 30 | ``` 31 | 32 | ## Reverted commit 33 | 34 | ``` 35 | commit 98c9f271d67b501ecf2ce995539abd2cdc81d505 36 | Author: Russ Cox 37 | Date: Wed Jun 28 17:45:26 2023 -0400 38 | 39 | regexp/syntax: use more compact Regexp.String output 40 | 41 | Compact the Regexp.String output. It was only ever intended for debugging, 42 | but there are at least some uses in the wild where regexps are built up 43 | using regexp/syntax and then formatted using the String method. 44 | Compact the output to help that use case. Specifically: 45 | 46 | - Compact 2-element character class ranges: [a-b] -> [ab]. 47 | - Aggregate flags: (?i:A)(?i:B)*(?i:C)|(?i:D)?(?i:E) -> (?i:AB*C|D?E). 48 | 49 | Fixes #57950. 50 | 51 | Change-Id: I1161d0e3aa6c3ae5a302677032bb7cd55caae5fb 52 | Reviewed-on: https://go-review.googlesource.com/c/go/+/507015 53 | TryBot-Result: Gopher Robot 54 | Reviewed-by: Than McIntosh 55 | Run-TryBot: Russ Cox 56 | Reviewed-by: Rob Pike 57 | Auto-Submit: Russ Cox 58 | ``` 59 | -------------------------------------------------------------------------------- /internal/syntaxutil/alias_test.go: -------------------------------------------------------------------------------- 1 | package syntaxutil 2 | 3 | import "regexp/syntax" 4 | 5 | // A bunch of aliases to avoid needing to modify parse_test.go too much. 6 | 7 | type Regexp = syntax.Regexp 8 | 9 | type Op = syntax.Op 10 | 11 | const ( 12 | OpNoMatch = syntax.OpNoMatch 13 | OpEmptyMatch = syntax.OpEmptyMatch 14 | OpLiteral = syntax.OpLiteral 15 | OpCharClass = syntax.OpCharClass 16 | OpAnyCharNotNL = syntax.OpAnyCharNotNL 17 | OpAnyChar = syntax.OpAnyChar 18 | OpBeginLine = syntax.OpBeginLine 19 | OpEndLine = syntax.OpEndLine 20 | OpBeginText = syntax.OpBeginText 21 | OpEndText = syntax.OpEndText 22 | OpWordBoundary = syntax.OpWordBoundary 23 | OpNoWordBoundary = syntax.OpNoWordBoundary 24 | OpCapture = syntax.OpCapture 25 | OpStar = syntax.OpStar 26 | OpPlus = syntax.OpPlus 27 | OpQuest = syntax.OpQuest 28 | OpRepeat = syntax.OpRepeat 29 | OpConcat = syntax.OpConcat 30 | OpAlternate = syntax.OpAlternate 31 | ) 32 | 33 | type Flags = syntax.Flags 34 | 35 | const ( 36 | FoldCase = syntax.FoldCase 37 | Literal = syntax.Literal 38 | ClassNL = syntax.ClassNL 39 | DotNL = syntax.DotNL 40 | OneLine = syntax.OneLine 41 | NonGreedy = syntax.NonGreedy 42 | PerlX = syntax.PerlX 43 | UnicodeGroups = syntax.UnicodeGroups 44 | WasDollar = syntax.WasDollar 45 | Simple = syntax.Simple 46 | MatchNL = syntax.MatchNL 47 | Perl = syntax.Perl 48 | POSIX = syntax.POSIX 49 | ) 50 | 51 | var Parse = syntax.Parse 52 | -------------------------------------------------------------------------------- /internal/tenant/context.go: -------------------------------------------------------------------------------- 1 | package tenant 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "runtime/pprof" 7 | "sync" 8 | 9 | "go.uber.org/atomic" 10 | 11 | "github.com/sourcegraph/zoekt/internal/tenant/internal/enforcement" 12 | "github.com/sourcegraph/zoekt/internal/tenant/internal/tenanttype" 13 | "github.com/sourcegraph/zoekt/internal/tenant/systemtenant" 14 | "github.com/sourcegraph/zoekt/internal/trace" 15 | ) 16 | 17 | var ErrMissingTenant = fmt.Errorf("missing tenant") 18 | 19 | func FromContext(ctx context.Context) (*tenanttype.Tenant, error) { 20 | tnt, ok := tenanttype.GetTenant(ctx) 21 | if !ok { 22 | return nil, ErrMissingTenant 23 | } 24 | return tnt, nil 25 | } 26 | 27 | // Log logs the tenant ID to the trace. If tenant logging is enabled, it also 28 | // logs a stack trace to a pprof profile. 29 | func Log(ctx context.Context, tr *trace.Trace) { 30 | if !enforceTenant() { 31 | return 32 | } 33 | 34 | if systemtenant.Is(ctx) { 35 | tr.LazyPrintf("tenant: system") 36 | return 37 | } 38 | tnt, ok := tenanttype.GetTenant(ctx) 39 | if !ok { 40 | if profile := pprofMissingTenant(); profile != nil { 41 | // We want to track every stack trace, so need a unique value for the event 42 | eventValue := pprofUniqID.Add(1) 43 | 44 | // skip stack for Add and this function (2). 45 | profile.Add(eventValue, 2) 46 | } 47 | tr.LazyPrintf("tenant: missing") 48 | return 49 | } 50 | tr.LazyPrintf("tenant: %d", tnt.ID()) 51 | } 52 | 53 | var pprofUniqID atomic.Int64 54 | var pprofOnce sync.Once 55 | var pprofProfile *pprof.Profile 56 | 57 | // pprofMissingTenant returns the pprof profile for missing tenants, 58 | // initializing it only once. 59 | func pprofMissingTenant() *pprof.Profile { 60 | pprofOnce.Do(func() { 61 | if shouldLogNoTenant() { 62 | pprofProfile = pprof.NewProfile("missing_tenant") 63 | } 64 | }) 65 | return pprofProfile 66 | } 67 | 68 | // shouldLogNoTenant returns true if the tenant enforcement mode is logging or strict. 69 | // It is used to log a warning if a request to a low-level store is made without a tenant 70 | // so we can identify missing tenants. This will go away and only strict will be allowed 71 | // once we are confident that all contexts carry tenants. 72 | func shouldLogNoTenant() bool { 73 | switch enforcement.EnforcementMode.Load() { 74 | case "logging", "strict": 75 | return true 76 | default: 77 | return false 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /internal/tenant/enforcement.go: -------------------------------------------------------------------------------- 1 | package tenant 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/sourcegraph/zoekt/internal/tenant/internal/enforcement" 7 | ) 8 | 9 | func enforceTenant() bool { 10 | switch enforcement.EnforcementMode.Load() { 11 | case "strict": 12 | return true 13 | default: 14 | return false 15 | } 16 | } 17 | 18 | // UseIDBasedShardNames returns true if the on disk layout of shards should 19 | // instead use tenant ID and repository IDs in the names instead of the actual 20 | // repository names. 21 | // 22 | // It is possible for repositories to have the same name, but have different 23 | // content in a multi-tenant setup. As such, this implementation only returns 24 | // true in those situations. 25 | // 26 | // Note: We could migrate all on-disk layout to only be ID based. However, 27 | // ID's are a Sourcegraph specific feature so we will always need the two code 28 | // paths. As such we only return true in multitenant setups. 29 | // 30 | // This is Sourcegraph specific. 31 | func UseIDBasedShardNames() bool { 32 | // We use the presence of this environment variable to tell if we are in a 33 | // multi-tenant setup. This is the same check that is done in the 34 | // Sourcegraph monorepo. 35 | return os.Getenv("WORKSPACES_API_URL") != "" 36 | } 37 | -------------------------------------------------------------------------------- /internal/tenant/grpc.go: -------------------------------------------------------------------------------- 1 | package tenant 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "runtime/pprof" 7 | "strconv" 8 | 9 | "google.golang.org/grpc" 10 | "google.golang.org/grpc/codes" 11 | "google.golang.org/grpc/metadata" 12 | "google.golang.org/grpc/status" 13 | 14 | grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware" 15 | 16 | "github.com/sourcegraph/zoekt/grpc/propagator" 17 | "github.com/sourcegraph/zoekt/internal/tenant/internal/tenanttype" 18 | ) 19 | 20 | const ( 21 | // headerKeyTenantID is the header key for the tenant ID. 22 | headerKeyTenantID = "X-Sourcegraph-Tenant-ID" 23 | 24 | // headerValueNoTenant indicates the request has no tenant. 25 | headerValueNoTenant = "none" 26 | ) 27 | 28 | // Propagator implements the propagator.Propagator interface 29 | // for propagating tenants across RPC calls. This is modeled directly on 30 | // the HTTP middleware in this package, and should work exactly the same. 31 | type Propagator struct{} 32 | 33 | var _ propagator.Propagator = &Propagator{} 34 | 35 | func (Propagator) FromContext(ctx context.Context) metadata.MD { 36 | md := make(metadata.MD) 37 | tenant, ok := tenanttype.GetTenant(ctx) 38 | if !ok { 39 | md.Append(headerKeyTenantID, headerValueNoTenant) 40 | } else { 41 | md.Append(headerKeyTenantID, strconv.Itoa(tenant.ID())) 42 | } 43 | return md 44 | } 45 | 46 | func (Propagator) InjectContext(ctx context.Context, md metadata.MD) (context.Context, error) { 47 | var raw string 48 | if vals := md.Get(headerKeyTenantID); len(vals) > 0 { 49 | raw = vals[0] 50 | } 51 | switch raw { 52 | case "", headerValueNoTenant: 53 | // Nothing to do, empty tenant. 54 | return ctx, nil 55 | default: 56 | tenant, err := tenanttype.Unmarshal(raw) 57 | if err != nil { 58 | // The tenant value is invalid. 59 | return ctx, status.New(codes.InvalidArgument, fmt.Errorf("bad tenant value in metadata: %w", err).Error()).Err() 60 | } 61 | return tenanttype.WithTenant(ctx, tenant), nil 62 | } 63 | } 64 | 65 | // UnaryServerInterceptor is a grpc.UnaryServerInterceptor that injects the tenant ID 66 | // from the context into pprof labels. 67 | func UnaryServerInterceptor(ctx context.Context, req any, _ *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (response any, err error) { 68 | if tnt, ok := tenanttype.GetTenant(ctx); ok { 69 | defer pprof.SetGoroutineLabels(ctx) 70 | ctx = pprof.WithLabels(ctx, pprof.Labels("tenant", tenanttype.Marshal(tnt))) 71 | pprof.SetGoroutineLabels(ctx) 72 | } 73 | 74 | return handler(ctx, req) 75 | } 76 | 77 | // StreamServerInterceptor is a grpc.StreamServerInterceptor that injects the tenant ID 78 | // from the context into pprof labels. 79 | func StreamServerInterceptor(srv any, ss grpc.ServerStream, _ *grpc.StreamServerInfo, handler grpc.StreamHandler) error { 80 | if tnt, ok := tenanttype.GetTenant(ss.Context()); ok { 81 | ctx := ss.Context() 82 | defer pprof.SetGoroutineLabels(ctx) 83 | ctx = pprof.WithLabels(ctx, pprof.Labels("tenant", tenanttype.Marshal(tnt))) 84 | 85 | pprof.SetGoroutineLabels(ctx) 86 | 87 | ss = &grpc_middleware.WrappedServerStream{ 88 | ServerStream: ss, 89 | WrappedContext: ctx, 90 | } 91 | } 92 | 93 | return handler(srv, ss) 94 | } 95 | -------------------------------------------------------------------------------- /internal/tenant/internal/enforcement/enforcement.go: -------------------------------------------------------------------------------- 1 | package enforcement 2 | 3 | import ( 4 | "os" 5 | 6 | "go.uber.org/atomic" 7 | ) 8 | 9 | // EnforcementMode is the current tenant enforcement mode. It resides here 10 | // instead of in the tenant package to avoid a circular dependency. See 11 | // tenanttest.MockEnforce. 12 | var EnforcementMode = atomic.NewString(os.Getenv("SRC_TENANT_ENFORCEMENT_MODE")) 13 | -------------------------------------------------------------------------------- /internal/tenant/internal/tenanttype/type.go: -------------------------------------------------------------------------------- 1 | package tenanttype 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "strconv" 7 | ) 8 | 9 | type Tenant struct { 10 | // never expose this otherwise impersonation outside of this package is possible. 11 | _id int 12 | } 13 | 14 | func (t *Tenant) ID() int { 15 | return t._id 16 | } 17 | 18 | type contextKey int 19 | 20 | const tenantKey contextKey = iota 21 | 22 | // WithTenant returns a new context for the given tenant. 23 | func WithTenant(ctx context.Context, tenant *Tenant) context.Context { 24 | return context.WithValue(ctx, tenantKey, tenant) 25 | } 26 | 27 | func GetTenant(ctx context.Context) (*Tenant, bool) { 28 | tnt, ok := ctx.Value(tenantKey).(*Tenant) 29 | return tnt, ok 30 | } 31 | 32 | func Unmarshal(s string) (*Tenant, error) { 33 | id, err := strconv.Atoi(s) 34 | if err != nil { 35 | return nil, fmt.Errorf("bad tenant value: %q: %w", s, err) 36 | } 37 | return FromID(id) 38 | } 39 | 40 | func Marshal(t *Tenant) string { 41 | return strconv.Itoa(t._id) 42 | } 43 | 44 | func FromID(id int) (*Tenant, error) { 45 | if id < 1 { 46 | return nil, fmt.Errorf("invalid tenant id: %d", id) 47 | } 48 | return &Tenant{_id: id}, nil 49 | } 50 | -------------------------------------------------------------------------------- /internal/tenant/internal/tenanttype/type_test.go: -------------------------------------------------------------------------------- 1 | package tenanttype 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestTenantRoundtrip(t *testing.T) { 11 | ctx := context.Background() 12 | tenantID := 42 13 | ctxWithTenant := WithTenant(ctx, &Tenant{tenantID}) 14 | tenant, ok := GetTenant(ctxWithTenant) 15 | require.True(t, ok) 16 | require.Equal(t, tenantID, tenant.ID()) 17 | } 18 | 19 | func TestFromContextWithoutTenant(t *testing.T) { 20 | ctx := context.Background() 21 | _, ok := GetTenant(ctx) 22 | require.False(t, ok) 23 | } 24 | -------------------------------------------------------------------------------- /internal/tenant/query.go: -------------------------------------------------------------------------------- 1 | package tenant 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/sourcegraph/zoekt/internal/tenant/systemtenant" 7 | ) 8 | 9 | // HasAccess returns true if the tenant ID in the context matches the 10 | // given ID. If tenant enforcement is disabled, it always returns true. 11 | func HasAccess(ctx context.Context, id int) bool { 12 | if !enforceTenant() { 13 | return true 14 | } 15 | if systemtenant.Is(ctx) { 16 | return true 17 | } 18 | t, err := FromContext(ctx) 19 | if err != nil { 20 | return false 21 | } 22 | return t.ID() == id 23 | } 24 | -------------------------------------------------------------------------------- /internal/tenant/systemtenant/systemtenant.go: -------------------------------------------------------------------------------- 1 | // Package systemtenant exports UnsafeCtx which allows to access shards across 2 | // all tenants. This must only be used for tasks that are not request specific. 3 | package systemtenant 4 | 5 | import ( 6 | "context" 7 | ) 8 | 9 | type contextKey int 10 | 11 | const systemTenantKey contextKey = iota 12 | 13 | // WithUnsafeContext taints the context to allow queries across all tenants. 14 | // Never use this for user requests. 15 | func WithUnsafeContext(ctx context.Context) context.Context { 16 | return context.WithValue(ctx, systemTenantKey, systemTenantKey) 17 | } 18 | 19 | // Is returns true if the context has been marked to allow queries across all 20 | // tenants. 21 | func Is(ctx context.Context) bool { 22 | _, ok := ctx.Value(systemTenantKey).(contextKey) 23 | return ok 24 | } 25 | -------------------------------------------------------------------------------- /internal/tenant/systemtenant/systemtenant_test.go: -------------------------------------------------------------------------------- 1 | package systemtenant 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestSystemTenantRoundTrip(t *testing.T) { 11 | if Is(context.Background()) { 12 | t.Fatal() 13 | } 14 | require.True(t, Is(WithUnsafeContext(context.Background()))) 15 | } 16 | -------------------------------------------------------------------------------- /internal/tenant/tenanttest/tenanttest.go: -------------------------------------------------------------------------------- 1 | package tenanttest 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "go.uber.org/atomic" 8 | 9 | "github.com/sourcegraph/zoekt/internal/tenant/internal/enforcement" 10 | "github.com/sourcegraph/zoekt/internal/tenant/internal/tenanttype" 11 | ) 12 | 13 | func MockEnforce(t *testing.T) { 14 | // prevent parallel tests from interfering with each other 15 | t.Setenv("mockEnforce", "true") 16 | 17 | old := enforcement.EnforcementMode.Load() 18 | t.Cleanup(func() { 19 | enforcement.EnforcementMode.Store(old) 20 | ResetTestTenants() 21 | }) 22 | 23 | enforcement.EnforcementMode.Store("strict") 24 | } 25 | 26 | // TestTenantCounter is a counter that is tracks tenants created from NewTestContext(). 27 | var TestTenantCounter atomic.Int64 28 | 29 | func NewTestContext() context.Context { 30 | return tenanttype.WithTenant(context.Background(), mustTenantFromID(int(TestTenantCounter.Inc()))) 31 | } 32 | 33 | // ResetTestTenants resets the test tenant counter that tracks the tenants 34 | // created from NewTestContext(). 35 | func ResetTestTenants() { 36 | TestTenantCounter.Store(0) 37 | } 38 | 39 | func mustTenantFromID(id int) *tenanttype.Tenant { 40 | tenant, err := tenanttype.FromID(id) 41 | if err != nil { 42 | panic(err) 43 | } 44 | return tenant 45 | } 46 | -------------------------------------------------------------------------------- /internal/trace/middleware.go: -------------------------------------------------------------------------------- 1 | package trace 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | 7 | "github.com/opentracing/opentracing-go" 8 | ) 9 | 10 | // Middleware wraps an http.Handler to extract opentracing span information from the request headers. 11 | // The opentracing.SpanContext is added to the request context, and can be retrieved by SpanContextFromContext. 12 | func Middleware(next http.Handler) http.Handler { 13 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 14 | tracer := opentracing.GlobalTracer() 15 | spanContext, err := tracer.Extract(opentracing.HTTPHeaders, opentracing.HTTPHeadersCarrier(r.Header)) 16 | if err == nil { 17 | r = r.WithContext(ContextWithSpanContext(r.Context(), spanContext)) 18 | } 19 | next.ServeHTTP(w, r) 20 | }) 21 | } 22 | 23 | type spanContextKey struct{} 24 | 25 | // SpanContextFromContext retrieves the opentracing.SpanContext set on the context by Middleware 26 | func SpanContextFromContext(ctx context.Context) opentracing.SpanContext { 27 | if v := ctx.Value(spanContextKey{}); v != nil { 28 | return v.(opentracing.SpanContext) 29 | } 30 | return nil 31 | } 32 | 33 | // ContextWithSpanContext creates a new context with the opentracing.SpanContext set 34 | func ContextWithSpanContext(ctx context.Context, sc opentracing.SpanContext) context.Context { 35 | return context.WithValue(ctx, spanContextKey{}, sc) 36 | } 37 | -------------------------------------------------------------------------------- /internal/trace/opentracing.go: -------------------------------------------------------------------------------- 1 | package trace 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/opentracing/opentracing-go" 7 | ) 8 | 9 | type key int 10 | 11 | const ( 12 | enableOpenTracingKey key = iota 13 | ) 14 | 15 | // isOpenTracingEnabled returns true if the enableOpenTracingKey context value is true. 16 | func isOpenTracingEnabled(ctx context.Context) bool { 17 | v, ok := ctx.Value(enableOpenTracingKey).(bool) 18 | if !ok { 19 | return false 20 | } 21 | return v 22 | } 23 | 24 | func WithOpenTracingEnabled(ctx context.Context, enableOpenTracing bool) context.Context { 25 | return context.WithValue(ctx, enableOpenTracingKey, enableOpenTracing) 26 | } 27 | 28 | // GetOpenTracer returns the tracer to actually use depending on whether isOpenTracingEnabled(ctx) 29 | // returns true or false. If false, this returns the NoopTracer. 30 | func GetOpenTracer(ctx context.Context, tracer opentracing.Tracer) opentracing.Tracer { 31 | if !isOpenTracingEnabled(ctx) { 32 | return opentracing.NoopTracer{} 33 | } 34 | if tracer == nil { 35 | return opentracing.GlobalTracer() 36 | } 37 | return tracer 38 | } 39 | -------------------------------------------------------------------------------- /internal/tracer/jaeger.go: -------------------------------------------------------------------------------- 1 | package tracer 2 | 3 | import ( 4 | "log" 5 | "reflect" 6 | 7 | "github.com/opentracing/opentracing-go" 8 | sglog "github.com/sourcegraph/log" 9 | "github.com/uber/jaeger-client-go" 10 | jaegercfg "github.com/uber/jaeger-client-go/config" 11 | jaegermetrics "github.com/uber/jaeger-lib/metrics" 12 | ) 13 | 14 | func configureJaeger(resource sglog.Resource) (opentracing.Tracer, error) { 15 | cfg, err := jaegercfg.FromEnv() 16 | cfg.ServiceName = resource.Name 17 | if err != nil { 18 | return nil, err 19 | } 20 | cfg.Tags = append( 21 | cfg.Tags, 22 | opentracing.Tag{Key: "service.version", Value: resource.Version}, 23 | opentracing.Tag{Key: "service.instance.id", Value: resource.InstanceID}, 24 | ) 25 | if reflect.DeepEqual(cfg.Sampler, &jaegercfg.SamplerConfig{}) { 26 | // Default sampler configuration for when it is not specified via 27 | // JAEGER_SAMPLER_* env vars. In most cases, this is sufficient 28 | // enough to connect to Jaeger without any env vars. 29 | cfg.Sampler.Type = jaeger.SamplerTypeConst 30 | cfg.Sampler.Param = 1 // 1 => enabled 31 | } 32 | tracer, _, err := cfg.NewTracer( 33 | jaegercfg.Logger(&jaegerLogger{}), 34 | jaegercfg.Metrics(jaegermetrics.NullFactory), 35 | ) 36 | if err != nil { 37 | return nil, err 38 | } 39 | return tracer, nil 40 | } 41 | 42 | type jaegerLogger struct{} 43 | 44 | func (l *jaegerLogger) Error(msg string) { 45 | log.Printf("ERROR: %s", msg) 46 | } 47 | 48 | // Infof logs a message at info priority 49 | func (l *jaegerLogger) Infof(msg string, args ...any) { 50 | log.Printf(msg, args...) 51 | } 52 | -------------------------------------------------------------------------------- /internal/tracer/tracer.go: -------------------------------------------------------------------------------- 1 | package tracer 2 | 3 | import ( 4 | "log" 5 | "os" 6 | "strconv" 7 | 8 | "github.com/opentracing/opentracing-go" 9 | sglog "github.com/sourcegraph/log" 10 | ) 11 | 12 | type tracerType string 13 | 14 | const ( 15 | tracerTypeNone tracerType = "none" 16 | tracerTypeJaeger tracerType = "jaeger" 17 | tracerTypeOpenTelemetry tracerType = "opentelemetry" 18 | ) 19 | 20 | func inferTracerType() tracerType { 21 | // default to disabled 22 | isJaegerDisabled, err := strconv.ParseBool(os.Getenv("JAEGER_DISABLED")) 23 | if err == nil && !isJaegerDisabled { 24 | return tracerTypeJaeger 25 | } 26 | 27 | // defaults to disabled 28 | isOpenTelemetryDisabled, err := strconv.ParseBool(os.Getenv("OPENTELEMETRY_DISABLED")) 29 | if err == nil && !isOpenTelemetryDisabled { 30 | return tracerTypeOpenTelemetry 31 | } 32 | 33 | return tracerTypeNone 34 | } 35 | 36 | // Init should only be called from main and only once 37 | // It will initialize the configured tracer, and register it as the global tracer 38 | // This MUST be the same tracer as the one used by Sourcegraph 39 | func Init(resource sglog.Resource) { 40 | var ( 41 | tt = inferTracerType() 42 | tracer opentracing.Tracer 43 | err error 44 | ) 45 | switch tt { 46 | case tracerTypeJaeger: 47 | tracer, err = configureJaeger(resource) 48 | if err != nil { 49 | log.Printf("failed to configure Jaeger tracer: %v", err) 50 | return 51 | } 52 | log.Printf("INFO: using Jaeger tracer") 53 | 54 | case tracerTypeOpenTelemetry: 55 | tracer, err = configureOpenTelemetry(resource) 56 | if err != nil { 57 | log.Printf("failed to configure OpenTelemetry tracer: %v", err) 58 | return 59 | } 60 | log.Printf("INFO: using OpenTelemetry tracer") 61 | } 62 | 63 | if tracer != nil { 64 | opentracing.SetGlobalTracer(tracer) 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /marshal_test.go: -------------------------------------------------------------------------------- 1 | package zoekt 2 | 3 | import ( 4 | "bytes" 5 | "encoding/gob" 6 | "testing" 7 | "time" 8 | 9 | "github.com/google/go-cmp/cmp" 10 | ) 11 | 12 | func BenchmarkRepoList_Encode(b *testing.B) { 13 | set := genRepoList(1000) 14 | 15 | // do one write to amortize away the cost of gob registration 16 | w := &countWriter{} 17 | enc := gob.NewEncoder(w) 18 | if err := enc.Encode(set); err != nil { 19 | b.Fatal(err) 20 | } 21 | 22 | b.ResetTimer() 23 | b.ReportAllocs() 24 | 25 | b.ReportMetric(float64(w.n), "bytes") 26 | 27 | for n := 0; n < b.N; n++ { 28 | if err := enc.Encode(set); err != nil { 29 | b.Fatal(err) 30 | } 31 | } 32 | } 33 | 34 | func BenchmarkRepoList_Decode(b *testing.B) { 35 | set := genRepoList(1000) 36 | 37 | var buf bytes.Buffer 38 | if err := gob.NewEncoder(&buf).Encode(set); err != nil { 39 | b.Fatal(err) 40 | } 41 | 42 | b.ResetTimer() 43 | b.ReportAllocs() 44 | 45 | for n := 0; n < b.N; n++ { 46 | // We need to include gob.NewDecoder cost to avoid measuring encoding. 47 | var repoBranches RepoList 48 | if err := gob.NewDecoder(bytes.NewReader(buf.Bytes())).Decode(&repoBranches); err != nil { 49 | b.Fatal(err) 50 | } 51 | } 52 | } 53 | 54 | func TestRepoList_Marshal(t *testing.T) { 55 | for i := range []int{0, 1, 10, 100} { 56 | want := genRepoList(i) 57 | 58 | var buf bytes.Buffer 59 | if err := gob.NewEncoder(&buf).Encode(want); err != nil { 60 | t.Fatal(err) 61 | } 62 | 63 | var got RepoList 64 | if err := gob.NewDecoder(bytes.NewReader(buf.Bytes())).Decode(&got); err != nil { 65 | t.Fatal(err) 66 | } 67 | 68 | if diff := cmp.Diff(want, &got); diff != "" { 69 | t.Fatalf("mismatch for reposmap size %d (-want +got):\n%s", i, diff) 70 | } 71 | } 72 | } 73 | 74 | func genRepoList(size int) *RepoList { 75 | m := make(ReposMap, size) 76 | indexTime := time.Now().Unix() 77 | for i := range size { 78 | m[uint32(i)] = MinimalRepoListEntry{ 79 | HasSymbols: true, 80 | IndexTimeUnix: indexTime, 81 | Branches: []RepositoryBranch{{ 82 | Name: "HEAD", 83 | Version: "c301e5c82b6e1632dce5c39902691c359559852e", 84 | }}, 85 | } 86 | } 87 | return &RepoList{ReposMap: m} 88 | } 89 | 90 | type countWriter struct { 91 | n int 92 | } 93 | 94 | func (w *countWriter) Write(b []byte) (int, error) { 95 | w.n += len(b) 96 | return len(b), nil 97 | } 98 | -------------------------------------------------------------------------------- /query/bits.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package query 16 | 17 | func toLower(in []byte) []byte { 18 | out := make([]byte, len(in)) 19 | for i, c := range in { 20 | if c >= 'A' && c <= 'Z' { 21 | c = c - 'A' + 'a' 22 | } 23 | out[i] = c 24 | } 25 | return out 26 | } 27 | -------------------------------------------------------------------------------- /query/doc.go: -------------------------------------------------------------------------------- 1 | // Licensed under the Apache License, Version 2.0 (the "License"); 2 | // you may not use this file except in compliance with the License. 3 | // You may obtain a copy of the License at 4 | // 5 | // http://www.apache.org/licenses/LICENSE-2.0 6 | // 7 | // Unless required by applicable law or agreed to in writing, software 8 | // distributed under the License is distributed on an "AS IS" BASIS, 9 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | // See the License for the specific language governing permissions and 11 | // limitations under the License. 12 | 13 | // Package query contains the API for creating Zoekt queries. Queries can be 14 | // constructed directly through query.Q objects, or by parsing a string using 15 | // query.Parse 16 | package query 17 | -------------------------------------------------------------------------------- /query/query_proto_test.go: -------------------------------------------------------------------------------- 1 | package query 2 | 3 | import ( 4 | "regexp/syntax" 5 | "testing" 6 | 7 | "github.com/RoaringBitmap/roaring" 8 | "github.com/google/go-cmp/cmp" 9 | "github.com/grafana/regexp" 10 | ) 11 | 12 | func TestQueryRoundtrip(t *testing.T) { 13 | testCases := []Q{ 14 | &Regexp{ 15 | Regexp: regexpMustParse("foo"), 16 | FileName: true, 17 | Content: true, 18 | CaseSensitive: true, 19 | }, 20 | &Symbol{ 21 | Expr: &Language{ 22 | Language: "go", 23 | }, 24 | }, 25 | &Language{ 26 | Language: "typescript", 27 | }, 28 | &Const{ 29 | Value: true, 30 | }, 31 | &Repo{ 32 | Regexp: regexp.MustCompile("github.com/foo/bar"), 33 | }, 34 | &RepoRegexp{ 35 | Regexp: regexp.MustCompile("github.com/foo.*"), 36 | }, 37 | &BranchesRepos{ 38 | List: []BranchRepos{{ 39 | Branch: "test", 40 | Repos: func() *roaring.Bitmap { 41 | bm := roaring.New() 42 | bm.Add(3) 43 | bm.Add(34) 44 | return bm 45 | }(), 46 | }}, 47 | }, 48 | NewRepoIDs(3, 4, 5), 49 | &Branch{ 50 | Pattern: "master", 51 | Exact: true, 52 | }, 53 | NewRepoSet("test1", "test2"), 54 | NewFileNameSet("test3", "test4"), 55 | &And{ 56 | Children: []Q{ 57 | &Language{Language: "go"}, 58 | &Type{ 59 | Child: &Substring{Pattern: "interface"}, 60 | Type: TypeFileMatch, 61 | }, 62 | }, 63 | }, 64 | &Or{ 65 | Children: []Q{ 66 | &Language{Language: "go"}, 67 | &Type{ 68 | Child: &Substring{Pattern: "interface"}, 69 | Type: TypeFileMatch, 70 | }, 71 | }, 72 | }, 73 | &Not{ 74 | Child: &Language{Language: "go"}, 75 | }, 76 | &Boost{ 77 | Child: &Or{ 78 | Children: []Q{ 79 | &And{ 80 | Children: []Q{ 81 | &Substring{Pattern: "foo"}, 82 | &Substring{Pattern: "bar"}, 83 | }, 84 | }, 85 | &Substring{Pattern: "foo bar"}, 86 | }, 87 | }, 88 | Boost: 20, 89 | }, 90 | } 91 | 92 | for _, q := range testCases { 93 | t.Run("", func(t *testing.T) { 94 | protoQ := QToProto(q) 95 | q2, err := QFromProto(protoQ) 96 | if err != nil { 97 | t.Fatal(err) 98 | } 99 | if diff := cmp.Diff(q.String(), q2.String()); diff != "" { 100 | t.Fatalf("unexpected diff: %s", diff) 101 | } 102 | }) 103 | } 104 | } 105 | 106 | func regexpMustParse(s string) *syntax.Regexp { 107 | re, err := syntax.Parse(s, syntax.Perl) 108 | if err != nil { 109 | panic(err) 110 | } 111 | return re 112 | } 113 | -------------------------------------------------------------------------------- /query/regexp.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package query 16 | 17 | import ( 18 | "log" 19 | "regexp/syntax" 20 | 21 | "slices" 22 | 23 | "github.com/sourcegraph/zoekt/internal/syntaxutil" 24 | ) 25 | 26 | var _ = log.Println 27 | 28 | func LowerRegexp(r *syntax.Regexp) *syntax.Regexp { 29 | newRE := *r 30 | switch r.Op { 31 | case syntax.OpLiteral, syntax.OpCharClass: 32 | newRE.Rune = make([]rune, len(r.Rune)) 33 | for i, c := range r.Rune { 34 | if c >= 'A' && c <= 'Z' { 35 | newRE.Rune[i] = c + 'a' - 'A' 36 | } else { 37 | newRE.Rune[i] = c 38 | } 39 | } 40 | default: 41 | newRE.Sub = make([]*syntax.Regexp, len(newRE.Sub)) 42 | for i, s := range r.Sub { 43 | newRE.Sub[i] = LowerRegexp(s) 44 | } 45 | } 46 | 47 | return &newRE 48 | } 49 | 50 | // OptimizeRegexp converts capturing groups to non-capturing groups. 51 | // Returns original input if an error is encountered 52 | func OptimizeRegexp(re *syntax.Regexp, flags syntax.Flags) *syntax.Regexp { 53 | r := convertCapture(re, flags) 54 | return r.Simplify() 55 | } 56 | 57 | func convertCapture(re *syntax.Regexp, flags syntax.Flags) *syntax.Regexp { 58 | if !hasCapture(re) { 59 | return re 60 | } 61 | 62 | // Make a copy so in unlikely event of an error the original can be used as a fallback 63 | r, err := syntax.Parse(syntaxutil.RegexpString(re), flags) 64 | if err != nil { 65 | log.Printf("failed to copy regexp `%s`: %v", re, err) 66 | return re 67 | } 68 | 69 | r = uncapture(r) 70 | 71 | // Parse again for new structure to take effect 72 | r, err = syntax.Parse(syntaxutil.RegexpString(r), flags) 73 | if err != nil { 74 | log.Printf("failed to parse regexp after uncapture `%s`: %v", r, err) 75 | return re 76 | } 77 | 78 | return r 79 | } 80 | 81 | func hasCapture(r *syntax.Regexp) bool { 82 | if r.Op == syntax.OpCapture { 83 | return true 84 | } 85 | 86 | return slices.ContainsFunc(r.Sub, hasCapture) 87 | } 88 | 89 | func uncapture(r *syntax.Regexp) *syntax.Regexp { 90 | if r.Op == syntax.OpCapture { 91 | // Captures only have one subexpression 92 | r.Op = syntax.OpConcat 93 | r.Cap = 0 94 | r.Name = "" 95 | } 96 | 97 | for i, s := range r.Sub { 98 | r.Sub[i] = uncapture(s) 99 | } 100 | 101 | return r 102 | } 103 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | { pkgs ? import { 2 | overlays = [ 3 | (import ./ctag-overlay.nix) 4 | ]; 5 | }}: 6 | let 7 | # pkgs.universal-ctags installs the binary as "ctags", not "universal-ctags" 8 | # like zoekt expects. 9 | universal-ctags = pkgs.writeScriptBin "universal-ctags" '' 10 | #!${pkgs.stdenv.shell} 11 | exec ${pkgs.universal-ctags}/bin/ctags "$@" 12 | ''; 13 | in 14 | pkgs.mkShell { 15 | name = "zoekt"; 16 | 17 | nativeBuildInputs = [ 18 | pkgs.go_1_23 19 | 20 | # zoekt-git-index 21 | pkgs.git 22 | 23 | # Used to index symbols 24 | universal-ctags 25 | ]; 26 | } 27 | -------------------------------------------------------------------------------- /testdata/backcompat/static_toc_v16.00000.zoekt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sourcegraph/zoekt/fd39c591438fbce188ed9dd0211aefc56bc7322a/testdata/backcompat/static_toc_v16.00000.zoekt -------------------------------------------------------------------------------- /testdata/fuzz/Fuzz_RepoList_ProtoRoundTrip/aeb560833e6a2ff8: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("A\"010000\x00\x00\x000000000000000000000000000000000000000000000000000\x00\x00\x000000000000000000000000000000000000000000000000000\x00\x00\x00\x0022222\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000000000002000\x00\x00\x00\x0000\x00\x00\x00\x0022222222\x00\x00\x00\x0000\x00\x00\x00\x000200000\x00\x00\x00\x0022222222\x00\x00\x00\x0000\x00\x00\x00\x0002\x00\x00\x00\x00\x00\x00\x00\x0000000000220000000000000000000000000000000000") 3 | -------------------------------------------------------------------------------- /testdata/gen-shards.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | # generate repo17.v17.0000.zoekt 6 | cp -r repo repo17 7 | 8 | go run ../cmd/zoekt-index -disable_ctags repo17 9 | go run ../cmd/zoekt-merge-index merge repo17_v16.00000.zoekt 10 | mv compound*zoekt repo17_v17.00000.zoekt 11 | 12 | rm -rf repo17 repo17_v16.00000.zoekt zoekt-builder-shard-log.tsv 13 | 14 | mv ./*.zoekt shards/ 15 | 16 | # generate repo2.v16.0000.zoekt 17 | go run ../cmd/zoekt-index repo2 18 | rm zoekt-builder-shard-log.tsv 19 | mv ./*.zoekt shards/ 20 | -------------------------------------------------------------------------------- /testdata/golden/TestReadSearch/repo17_v17.00000.golden: -------------------------------------------------------------------------------- 1 | { 2 | "FormatVersion": 17, 3 | "FeatureVersion": 12, 4 | "FileMatches": [ 5 | [ 6 | { 7 | "FileName": "main.go", 8 | "Repository": "repo17", 9 | "Language": "Go", 10 | "LineMatches": [ 11 | { 12 | "Line": "ZnVuYyBtYWluKCkgewo=", 13 | "LineStart": 69, 14 | "LineEnd": 83, 15 | "LineNumber": 10, 16 | "Before": null, 17 | "After": null, 18 | "FileName": false, 19 | "Score": 501, 20 | "DebugScore": "", 21 | "LineFragments": [ 22 | { 23 | "LineOffset": 0, 24 | "Offset": 69, 25 | "MatchLength": 9, 26 | "SymbolInfo": null 27 | } 28 | ] 29 | } 30 | ], 31 | "Checksum": "n9fUYqacPXg=", 32 | "Score": 5000000010 33 | } 34 | ], 35 | [ 36 | { 37 | "FileName": "main.go", 38 | "Repository": "repo17", 39 | "Language": "Go", 40 | "LineMatches": [ 41 | { 42 | "Line": "cGFja2FnZSBtYWluCg==", 43 | "LineStart": 0, 44 | "LineEnd": 13, 45 | "LineNumber": 1, 46 | "Before": null, 47 | "After": null, 48 | "FileName": false, 49 | "Score": 501, 50 | "DebugScore": "", 51 | "LineFragments": [ 52 | { 53 | "LineOffset": 0, 54 | "Offset": 0, 55 | "MatchLength": 7, 56 | "SymbolInfo": null 57 | } 58 | ] 59 | } 60 | ], 61 | "Checksum": "n9fUYqacPXg=", 62 | "Score": 5000000010 63 | } 64 | ], 65 | null, 66 | null 67 | ] 68 | } -------------------------------------------------------------------------------- /testdata/golden/TestReadSearch/repo2_v16.00000.golden: -------------------------------------------------------------------------------- 1 | { 2 | "FormatVersion": 16, 3 | "FeatureVersion": 12, 4 | "FileMatches": [ 5 | [ 6 | { 7 | "FileName": "main.go", 8 | "Repository": "repo2", 9 | "Language": "Go", 10 | "LineMatches": [ 11 | { 12 | "Line": "ZnVuYyBtYWluKCkgewo=", 13 | "LineStart": 33, 14 | "LineEnd": 47, 15 | "LineNumber": 7, 16 | "Before": null, 17 | "After": null, 18 | "FileName": false, 19 | "Score": 6801, 20 | "DebugScore": "", 21 | "LineFragments": [ 22 | { 23 | "LineOffset": 0, 24 | "Offset": 33, 25 | "MatchLength": 9, 26 | "SymbolInfo": null 27 | } 28 | ] 29 | } 30 | ], 31 | "Checksum": "Ju1TnQKZ6mE=", 32 | "Score": 68000000010 33 | } 34 | ], 35 | [ 36 | { 37 | "FileName": "main.go", 38 | "Repository": "repo2", 39 | "Language": "Go", 40 | "LineMatches": [ 41 | { 42 | "Line": "cGFja2FnZSBtYWluCg==", 43 | "LineStart": 0, 44 | "LineEnd": 13, 45 | "LineNumber": 1, 46 | "Before": null, 47 | "After": null, 48 | "FileName": false, 49 | "Score": 501, 50 | "DebugScore": "", 51 | "LineFragments": [ 52 | { 53 | "LineOffset": 0, 54 | "Offset": 0, 55 | "MatchLength": 7, 56 | "SymbolInfo": null 57 | } 58 | ] 59 | } 60 | ], 61 | "Checksum": "Ju1TnQKZ6mE=", 62 | "Score": 5000000010 63 | } 64 | ], 65 | null, 66 | null 67 | ] 68 | } -------------------------------------------------------------------------------- /testdata/golden/TestReadSearch/repo_v16.00000.golden: -------------------------------------------------------------------------------- 1 | { 2 | "FormatVersion": 16, 3 | "FeatureVersion": 12, 4 | "FileMatches": [ 5 | [ 6 | { 7 | "FileName": "main.go", 8 | "Repository": "repo", 9 | "Language": "Go", 10 | "LineMatches": [ 11 | { 12 | "Line": "ZnVuYyBtYWluKCkgewo=", 13 | "LineStart": 69, 14 | "LineEnd": 83, 15 | "LineNumber": 10, 16 | "Before": null, 17 | "After": null, 18 | "FileName": false, 19 | "Score": 501, 20 | "DebugScore": "", 21 | "LineFragments": [ 22 | { 23 | "LineOffset": 0, 24 | "Offset": 69, 25 | "MatchLength": 9, 26 | "SymbolInfo": null 27 | } 28 | ] 29 | } 30 | ], 31 | "Checksum": "n9fUYqacPXg=", 32 | "Score": 5000000010 33 | } 34 | ], 35 | [ 36 | { 37 | "FileName": "main.go", 38 | "Repository": "repo", 39 | "Language": "Go", 40 | "LineMatches": [ 41 | { 42 | "Line": "cGFja2FnZSBtYWluCg==", 43 | "LineStart": 0, 44 | "LineEnd": 13, 45 | "LineNumber": 1, 46 | "Before": null, 47 | "After": null, 48 | "FileName": false, 49 | "Score": 501, 50 | "DebugScore": "", 51 | "LineFragments": [ 52 | { 53 | "LineOffset": 0, 54 | "Offset": 0, 55 | "MatchLength": 7, 56 | "SymbolInfo": null 57 | } 58 | ] 59 | } 60 | ], 61 | "Checksum": "n9fUYqacPXg=", 62 | "Score": 5000000010 63 | } 64 | ], 65 | null, 66 | null 67 | ] 68 | } -------------------------------------------------------------------------------- /testdata/repo/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "fmt" 4 | 5 | var ( 6 | num = 5 7 | message = "hello" 8 | ) 9 | 10 | func main() { 11 | fmt.Println(message, num) 12 | } 13 | -------------------------------------------------------------------------------- /testdata/repo2/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | func main() { 8 | var b, c int = 1, 2 9 | fmt.Println(b, c) 10 | 11 | fruit := "apple" 12 | fmt.Println(fruit) 13 | } 14 | -------------------------------------------------------------------------------- /testdata/search_result_1.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sourcegraph/zoekt/fd39c591438fbce188ed9dd0211aefc56bc7322a/testdata/search_result_1.pb -------------------------------------------------------------------------------- /testdata/shards/ctagsrepo_v16.00000.zoekt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sourcegraph/zoekt/fd39c591438fbce188ed9dd0211aefc56bc7322a/testdata/shards/ctagsrepo_v16.00000.zoekt -------------------------------------------------------------------------------- /testdata/shards/ctagsrepo_v17.00000.zoekt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sourcegraph/zoekt/fd39c591438fbce188ed9dd0211aefc56bc7322a/testdata/shards/ctagsrepo_v17.00000.zoekt -------------------------------------------------------------------------------- /testdata/shards/repo17_v17.00000.zoekt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sourcegraph/zoekt/fd39c591438fbce188ed9dd0211aefc56bc7322a/testdata/shards/repo17_v17.00000.zoekt -------------------------------------------------------------------------------- /testdata/shards/repo2_v16.00000.zoekt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sourcegraph/zoekt/fd39c591438fbce188ed9dd0211aefc56bc7322a/testdata/shards/repo2_v16.00000.zoekt -------------------------------------------------------------------------------- /testdata/shards/repo_v16.00000.zoekt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sourcegraph/zoekt/fd39c591438fbce188ed9dd0211aefc56bc7322a/testdata/shards/repo_v16.00000.zoekt -------------------------------------------------------------------------------- /web/doc.go: -------------------------------------------------------------------------------- 1 | // Licensed under the Apache License, Version 2.0 (the "License"); 2 | // you may not use this file except in compliance with the License. 3 | // You may obtain a copy of the License at 4 | // 5 | // http://www.apache.org/licenses/LICENSE-2.0 6 | // 7 | // Unless required by applicable law or agreed to in writing, software 8 | // distributed under the License is distributed on an "AS IS" BASIS, 9 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | // See the License for the specific language governing permissions and 11 | // limitations under the License. 12 | 13 | // Package web contains the logic for spinning up a zoekt webserver. It's exposed separately 14 | // from zoekt-webserver to allow for customizing the endpoints and format templates. 15 | package web 16 | -------------------------------------------------------------------------------- /web/server_test.go: -------------------------------------------------------------------------------- 1 | package web 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/google/go-cmp/cmp" 7 | ) 8 | 9 | func TestAddLineNumbers(t *testing.T) { 10 | tests := []struct { 11 | name string 12 | content string 13 | lineNum int 14 | isBefore bool 15 | want []lineMatch 16 | }{ 17 | { 18 | name: "empty content", 19 | content: "", 20 | lineNum: 10, 21 | isBefore: true, 22 | want: nil, 23 | }, 24 | { 25 | name: "single line before", 26 | content: "hello world", 27 | lineNum: 10, 28 | isBefore: true, 29 | want: []lineMatch{ 30 | {LineNum: 9, Content: "hello world"}, 31 | }, 32 | }, 33 | { 34 | name: "single line after", 35 | content: "hello world", 36 | lineNum: 10, 37 | isBefore: false, 38 | want: []lineMatch{ 39 | {LineNum: 11, Content: "hello world"}, 40 | }, 41 | }, 42 | { 43 | name: "multiple lines before", 44 | content: "first line\nsecond line\nthird line", 45 | lineNum: 10, 46 | isBefore: true, 47 | want: []lineMatch{ 48 | {LineNum: 7, Content: "first line"}, 49 | {LineNum: 8, Content: "second line"}, 50 | {LineNum: 9, Content: "third line"}, 51 | }, 52 | }, 53 | { 54 | name: "multiple lines after", 55 | content: "first line\nsecond line\nthird line", 56 | lineNum: 10, 57 | isBefore: false, 58 | want: []lineMatch{ 59 | {LineNum: 11, Content: "first line"}, 60 | {LineNum: 12, Content: "second line"}, 61 | {LineNum: 13, Content: "third line"}, 62 | }, 63 | }, 64 | { 65 | name: "content with empty lines before", 66 | content: "first line\n\nthird line", 67 | lineNum: 10, 68 | isBefore: true, 69 | want: []lineMatch{ 70 | {LineNum: 7, Content: "first line"}, 71 | {LineNum: 8, Content: ""}, 72 | {LineNum: 9, Content: "third line"}, 73 | }, 74 | }, 75 | { 76 | name: "content with empty lines after", 77 | content: "first line\n\nthird line", 78 | lineNum: 10, 79 | isBefore: false, 80 | want: []lineMatch{ 81 | {LineNum: 11, Content: "first line"}, 82 | {LineNum: 12, Content: ""}, 83 | {LineNum: 13, Content: "third line"}, 84 | }, 85 | }, 86 | } 87 | 88 | for _, tt := range tests { 89 | t.Run(tt.name, func(t *testing.T) { 90 | got := AddLineNumbers(tt.content, tt.lineNum, tt.isBefore) 91 | if diff := cmp.Diff(tt.want, got); diff != "" { 92 | t.Errorf("AddLineNumbers() mismatch (-want +got):\n%s", diff) 93 | } 94 | }) 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /web/trace.go: -------------------------------------------------------------------------------- 1 | package web 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/opentracing/opentracing-go" 7 | 8 | "github.com/sourcegraph/zoekt" 9 | "github.com/sourcegraph/zoekt/internal/trace" 10 | "github.com/sourcegraph/zoekt/query" 11 | ) 12 | 13 | func NewTraceAwareSearcher(s zoekt.Streamer) zoekt.Streamer { 14 | return traceAwareSearcher{Searcher: s} 15 | } 16 | 17 | // traceAwareSearcher wraps a zoekt.Searcher instance so that the tracing context item is set in the 18 | // context. This context item toggles on trace collection via the 19 | // github.com/sourcegraph/zoekt/internal/trace/ot package. 20 | type traceAwareSearcher struct { 21 | Searcher zoekt.Streamer 22 | } 23 | 24 | func (s traceAwareSearcher) Search( 25 | ctx context.Context, 26 | q query.Q, 27 | opts *zoekt.SearchOptions, 28 | ) (*zoekt.SearchResult, error) { 29 | ctx = trace.WithOpenTracingEnabled(ctx, opts.Trace) 30 | spanContext := trace.SpanContextFromContext(ctx) 31 | if opts.Trace && spanContext != nil { 32 | var span opentracing.Span 33 | span, ctx = opentracing.StartSpanFromContext(ctx, "zoekt.traceAwareSearcher.Search", opentracing.ChildOf(spanContext)) 34 | defer span.Finish() 35 | } 36 | return s.Searcher.Search(ctx, q, opts) 37 | } 38 | 39 | func (s traceAwareSearcher) StreamSearch( 40 | ctx context.Context, 41 | q query.Q, 42 | opts *zoekt.SearchOptions, 43 | sender zoekt.Sender, 44 | ) error { 45 | ctx = trace.WithOpenTracingEnabled(ctx, opts.Trace) 46 | spanContext := trace.SpanContextFromContext(ctx) 47 | if opts.Trace && spanContext != nil { 48 | var span opentracing.Span 49 | span, ctx = opentracing.StartSpanFromContext(ctx, "zoekt.traceAwareSearcher.StreamSearch", opentracing.ChildOf(spanContext)) 50 | defer span.Finish() 51 | } 52 | return s.Searcher.StreamSearch(ctx, q, opts, sender) 53 | } 54 | 55 | func (s traceAwareSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) { 56 | return s.Searcher.List(ctx, q, opts) 57 | } 58 | func (s traceAwareSearcher) Close() { s.Searcher.Close() } 59 | func (s traceAwareSearcher) String() string { return s.Searcher.String() } 60 | --------------------------------------------------------------------------------