├── .github └── workflows │ ├── build.yml │ ├── publish.yml │ ├── release.yml │ └── staging-approval-public.yml ├── .gitignore ├── .gitmodules ├── .promu.yml ├── CHANGELOG.md ├── Dockerfile ├── LICENSE ├── Makefile ├── PROM_VERSION ├── README.md ├── RELEASE-PROCESS.md ├── VERSION ├── cmd ├── internal │ ├── config.go │ ├── logger.go │ ├── otlp_factory.go │ ├── start_components.go │ ├── start_components_test.go │ ├── start_leader.go │ └── start_telemetry.go └── opentelemetry-prometheus-sidecar │ ├── e2e_test.go │ ├── fdlimits_default.go │ ├── fdlimits_windows.go │ ├── main.go │ ├── main_test.go │ ├── testdata │ ├── certs │ │ ├── README.md │ │ ├── root_ca.crt │ │ ├── sidecar.test.crt │ │ └── sidecar.test.key │ └── wal │ │ └── 000000 │ ├── uname_default.go │ ├── uname_linux.go │ ├── uname_linux_int8.go │ ├── uname_linux_uint8.go │ └── validation_test.go ├── common ├── failingset.go ├── failingset_test.go ├── instruments.go └── promapi.go ├── config ├── config.go ├── config_test.go ├── example_test.go └── sidecar.example.yaml ├── docs ├── architecture.svg ├── design.md ├── img │ └── opentelemetry-prometheus-sidecar.png └── operations.md ├── go.mod ├── go.sum ├── health ├── health.go └── health_test.go ├── internal ├── otlptest │ └── otlptest.go └── promtest │ └── fake.go ├── leader ├── leader.go └── leader_test.go ├── metadata ├── metadata.go └── metadata_test.go ├── otlp ├── client.go ├── client_test.go ├── ewma.go ├── queue_manager.go ├── queue_manager_test.go ├── test_server.go ├── writer.go └── writer_test.go ├── package.go ├── prometheus ├── monitor.go ├── monitor_test.go ├── ready.go └── ready_test.go ├── retrieval ├── delay.go ├── delay_test.go ├── manager.go ├── manager_test.go ├── series_cache.go ├── series_cache_test.go ├── testdata │ ├── bearertoken.txt │ ├── ca.cer │ ├── ca.key │ ├── client.cer │ ├── client.key │ ├── server.cer │ ├── server.key │ ├── servername.cer │ └── servername.key ├── transform.go └── transform_test.go ├── snappy └── snappy.go ├── supervisor ├── supervisor.go └── supervisor_test.go ├── tail ├── tail.go ├── tail_test.go └── testdata │ ├── corruption │ └── 00000000 │ └── invalid-segment │ ├── 00000000 │ └── 00000001 └── telemetry ├── README.md ├── cmd └── sidecar-telemetry-test │ └── main.go ├── doevery ├── doevery.go └── doevery_test.go ├── static.go ├── telemetry.go ├── telemetry_test.go └── timer.go /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | 16 | - name: Set up Go 17 | uses: actions/setup-go@v2 18 | with: 19 | go-version: 1.15 20 | 21 | - name: Install Prometheus 22 | run: | 23 | export PROM_VERSION=`cat $GITHUB_WORKSPACE/PROM_VERSION` 24 | mkdir $HOME/prom 25 | cd $HOME/prom/ 26 | wget https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.linux-amd64.tar.gz 27 | tar -xvzf prometheus-${PROM_VERSION}.linux-amd64.tar.gz 28 | mkdir -p $GITHUB_WORKSPACE/bin 29 | cp prometheus-${PROM_VERSION}.linux-amd64/prom* $GITHUB_WORKSPACE/bin 30 | echo "$GITHUB_WORKSPACE/bin" >> $GITHUB_PATH 31 | 32 | - name: Build 33 | run: make 34 | 35 | - name: Test 36 | run: go test ./... 37 | 38 | - name: Test w/ race 39 | run: go test -race ./... 40 | 41 | - name: Send data to OTLP backend 42 | uses: codeboten/github-action-to-otlp@v1 43 | with: 44 | endpoint: "ingest.lightstep.com:443" 45 | headers: "lightstep-access-token=${{ secrets.ACCESS_TOKEN }}" 46 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Build and push Docker images 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v2 13 | 14 | - name: Set up Go 15 | uses: actions/setup-go@v2 16 | with: 17 | go-version: 1.15 18 | 19 | - name: Build 20 | run: make build-linux-amd64 21 | 22 | - name: Prepare 23 | id: prep 24 | run: | 25 | DOCKER_IMAGE=lightstep/opentelemetry-prometheus-sidecar 26 | VERSION=edge 27 | if [[ $GITHUB_REF == refs/tags/* ]]; then 28 | VERSION=${GITHUB_REF#refs/tags/} 29 | elif [[ $GITHUB_REF == refs/heads/* ]]; then 30 | VERSION=$(echo ${GITHUB_REF#refs/heads/} | sed -r 's#/+#-#g') 31 | elif [[ $GITHUB_REF == refs/pull/* ]]; then 32 | VERSION=pr-${{ github.event.number }} 33 | fi 34 | TAGS="${DOCKER_IMAGE}:${VERSION}" 35 | if [ "${{ github.event_name }}" = "push" ]; then 36 | TAGS="$TAGS,${DOCKER_IMAGE}:sha-${GITHUB_SHA::8}" 37 | fi 38 | echo ::set-output name=version::${VERSION} 39 | echo ::set-output name=tags::${TAGS} 40 | echo ::set-output name=created::$(date -u +'%Y-%m-%dT%H:%M:%SZ') 41 | 42 | - name: Set up Docker Buildx 43 | uses: docker/setup-buildx-action@v1 44 | 45 | - name: Login to DockerHub 46 | if: github.event_name != 'pull_request' 47 | uses: docker/login-action@v1 48 | with: 49 | username: ${{ secrets.DOCKER_USERNAME }} 50 | password: ${{ secrets.DOCKER_PASSWORD }} 51 | 52 | - name: Publish image 53 | uses: docker/build-push-action@v2 54 | with: 55 | context: . 56 | push: ${{ github.event_name != 'pull_request' }} 57 | tags: ${{ steps.prep.outputs.tags }} 58 | labels: | 59 | org.opencontainers.image.source=${{ github.event.repository.html_url }} 60 | org.opencontainers.image.created=${{ steps.prep.outputs.created }} 61 | org.opencontainers.image.revision=${{ github.sha }} 62 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release and Tag 2 | 3 | 4 | on: 5 | push: 6 | tags: 7 | - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 8 | 9 | jobs: 10 | build: 11 | name: Create Release 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Create Release 16 | id: create_release 17 | uses: actions/create-release@latest 18 | env: 19 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token 20 | with: 21 | tag_name: ${{ github.ref }} 22 | release_name: Release ${{ github.ref }} 23 | body: | 24 | - ${{ github.ref }} 25 | draft: false 26 | prerelease: false 27 | 28 | - name: Set up Docker Buildx 29 | uses: docker/setup-buildx-action@v1 30 | 31 | - name: Login to DockerHub 32 | if: github.event_name != 'pull_request' 33 | uses: docker/login-action@v1 34 | with: 35 | username: ${{ secrets.DOCKER_USERNAME }} 36 | password: ${{ secrets.DOCKER_PASSWORD }} 37 | 38 | - name: Tag image 39 | run: | 40 | DOCKER_IMAGE=lightstep/opentelemetry-prometheus-sidecar 41 | docker pull ${DOCKER_IMAGE}:sha-${GITHUB_SHA::8} 42 | docker tag ${DOCKER_IMAGE}:sha-${GITHUB_SHA::8} ${DOCKER_IMAGE}:latest 43 | docker tag ${DOCKER_IMAGE}:sha-${GITHUB_SHA::8} ${DOCKER_IMAGE}:${GITHUB_REF##*/} 44 | docker push ${DOCKER_IMAGE}:latest 45 | docker push ${DOCKER_IMAGE}:${GITHUB_REF##*/} 46 | -------------------------------------------------------------------------------- /.github/workflows/staging-approval-public.yml: -------------------------------------------------------------------------------- 1 | name: staging-approval-public 2 | on: 3 | workflow_run: 4 | # Triger this workflow if either the "Build and push Docker images" 5 | # or the "Release and Tag" complete 6 | workflows: ["Build and push Docker images", "Release and Tag"] 7 | types: [completed] 8 | 9 | jobs: 10 | 11 | #------------------------------------------------------------------ 12 | setup: 13 | runs-on: ubuntu-latest 14 | if: ${{ github.event.workflow_run.conclusion == 'success' }} 15 | steps: 16 | - name: determine-image-tag 17 | # Figure out whether this is a tag push or a merge to main 18 | # and set an IMAGE_TAG variable acordingly 19 | run: | 20 | 21 | GITHUB_SHA=${{ github.event.workflow_run.head_sha }} 22 | GITHUB_REF=${{ github.event.workflow_run.head_branch }} 23 | echo GITHUB_SHA is $GITHUB_SHA 24 | echo GITHUB_REF is $GITHUB_REF 25 | 26 | if [[ $GITHUB_REF == v0* ]]; then 27 | IMAGE_TAG=${GITHUB_REF} 28 | elif [[ $GITHUB_REF == main ]]; then 29 | IMAGE_TAG=sha-${GITHUB_SHA::8} 30 | fi 31 | mkdir -p share 32 | echo IMAGE_TAG is $IMAGE_TAG 33 | echo $IMAGE_TAG > share/image_tag.txt 34 | cat share/image_tag.txt 35 | 36 | - name: save-image-tag-to-file 37 | # Upload the file containing the IMAGE_TAG info so that 38 | # the next job can read it 39 | uses: actions/upload-artifact@master 40 | with: 41 | name: image-tag 42 | path: share 43 | 44 | #------------------------------------------------------------------ 45 | 46 | deploy-staging: 47 | # Job to deploy to staging 48 | needs: setup 49 | environment: 50 | name: staging 51 | runs-on: ubuntu-latest 52 | steps: 53 | - name: get-image-tag-from-file 54 | # Read the image tag from file 55 | uses: actions/download-artifact@master 56 | with: 57 | name: image-tag 58 | path: share 59 | 60 | - name: set-IMAGE_TAG-variable 61 | run: | 62 | ls -al share 63 | echo reading image_tag as $(cat share/image_tag.txt) 64 | echo "IMAGE_TAG=$(cat share/image_tag.txt)" >> $GITHUB_ENV 65 | echo IMAGE_TAG is $IMAGE_TAG 66 | 67 | - name: debug 68 | run: | 69 | echo IMAGE_TAG is $IMAGE_TAG 70 | 71 | - name: codefresh-pipeline-runner-prom-stack 72 | uses: codefresh-io/codefresh-pipeline-runner@v7 73 | env: 74 | PIPELINE_NAME: 'prom-stack/deploy-prometheus-to-stg' 75 | CF_API_KEY: ${{ secrets.CODEFRESH_API_TOKEN }} 76 | id: run-pipeline-ps 77 | with: 78 | args: -v OTEL-VERSION=${{ env.IMAGE_TAG }} 79 | 80 | 81 | #------------------------------------------------------------------ 82 | 83 | deploy-public: 84 | # Job to deploy to staging 85 | needs: deploy-staging 86 | environment: 87 | name: public 88 | runs-on: ubuntu-latest 89 | steps: 90 | - name: get-image-tag-from-file 91 | # Read the image tag from file 92 | uses: actions/download-artifact@master 93 | with: 94 | name: image-tag 95 | path: share 96 | 97 | - name: set-IMAGE_TAG-variable 98 | run: | 99 | ls -al share 100 | echo reading image_tag as $(cat share/image_tag.txt) 101 | echo "IMAGE_TAG=$(cat share/image_tag.txt)" >> $GITHUB_ENV 102 | echo IMAGE_TAG is $IMAGE_TAG 103 | 104 | - name: debug 105 | run: | 106 | echo IMAGE_TAG is $IMAGE_TAG 107 | 108 | - name: codefresh-pipeline-runner-prom-stack 109 | uses: codefresh-io/codefresh-pipeline-runner@v7 110 | env: 111 | PIPELINE_NAME: 'prom-stack/deploy-prometheus-to-pub' 112 | CF_API_KEY: ${{ secrets.CODEFRESH_API_TOKEN }} 113 | id: run-pipeline-ps 114 | with: 115 | args: -v OTEL-VERSION=${{ env.IMAGE_TAG }} 116 | 117 | 118 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # checkpoint file 2 | opentelemetry_sidecar.json 3 | # this binary 4 | /opentelemetry-prometheus-sidecar 5 | # vendor dir 6 | vendor/ 7 | data/ 8 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "upstream-fork"] 2 | path = upstream-fork 3 | url = git@github.com:Stackdriver/stackdriver-prometheus-sidecar.git 4 | -------------------------------------------------------------------------------- /.promu.yml: -------------------------------------------------------------------------------- 1 | verbose: false 2 | go: 3 | version: 1.14 4 | cgo: false 5 | repository: 6 | path: github.com/lightstep/opentelemetry-prometheus-sidecar 7 | build: 8 | binaries: 9 | - name: opentelemetry-prometheus-sidecar 10 | path: ./cmd/opentelemetry-prometheus-sidecar 11 | flags: -mod=vendor -a -tags netgo 12 | ldflags: | 13 | -X github.com/prometheus/common/version.Version={{ printf "%s-%s" "sha" (slice .Revision 0 8 )}} 14 | -X github.com/prometheus/common/version.Revision={{.Revision}} 15 | -X github.com/prometheus/common/version.Branch={{.Branch}} 16 | -X github.com/prometheus/common/version.BuildUser={{user}}@{{host}} 17 | -X github.com/prometheus/common/version.BuildDate={{date "20060102-15:04:05"}} 18 | tarball: 19 | files: 20 | - LICENSE 21 | - NOTICE 22 | crossbuild: 23 | platforms: 24 | - linux/amd64 25 | - linux/386 26 | - darwin/amd64 27 | - darwin/386 28 | - windows/amd64 29 | - windows/386 30 | - freebsd/amd64 31 | - freebsd/386 32 | - openbsd/amd64 33 | - openbsd/386 34 | - netbsd/amd64 35 | - netbsd/386 36 | - dragonfly/amd64 37 | - linux/arm 38 | - linux/arm64 39 | - freebsd/arm 40 | # Temporarily deactivated as golang.org/x/sys does not have syscalls 41 | # implemented for that os/platform combination. 42 | #- openbsd/arm 43 | #- linux/mips64 44 | #- linux/mips64le 45 | - netbsd/arm 46 | - linux/ppc64 47 | - linux/ppc64le 48 | 49 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/distroless/static:latest 2 | LABEL maintainer "Lightstep Engineering " 3 | 4 | COPY opentelemetry-prometheus-sidecar /bin/opentelemetry-prometheus-sidecar 5 | 6 | ENTRYPOINT [ "/bin/opentelemetry-prometheus-sidecar" ] 7 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The Prometheus Authors 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | # Ensure GOBIN is not set during build so that promu is installed to the correct path 15 | unexport GOBIN 16 | 17 | GO ?= go 18 | GOFMT ?= $(GO)fmt 19 | FIRST_GOPATH := $(firstword $(subst :, ,$(shell $(GO) env GOPATH))) 20 | GOHOSTOS ?= $(shell $(GO) env GOHOSTOS) 21 | GOHOSTARCH ?= $(shell $(GO) env GOHOSTARCH) 22 | 23 | # Enforce Go modules support just in case the directory is inside GOPATH (and for Travis CI). 24 | GO111MODULE := on 25 | # Always use the local vendor/ directory to satisfy the dependencies. This is required by `promu`. 26 | GOOPTS := $(GOOPTS) -mod=vendor 27 | 28 | PROMU := $(FIRST_GOPATH)/bin/promu 29 | STATICCHECK := $(FIRST_GOPATH)/bin/staticcheck 30 | GOVERALLS := $(FIRST_GOPATH)/bin/goveralls 31 | pkgs = ./... 32 | 33 | ifeq (arm, $(GOHOSTARCH)) 34 | GOHOSTARM ?= $(shell GOARM= $(GO) env GOARM) 35 | GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH)v$(GOHOSTARM) 36 | else 37 | GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH) 38 | endif 39 | 40 | PROMU_VERSION ?= 0.12.0 41 | PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz 42 | 43 | PREFIX ?= $(shell pwd) 44 | BIN_DIR ?= $(shell pwd) 45 | # Private repo. 46 | DOCKER_IMAGE_NAME ?= opentelemetry-prometheus-sidecar 47 | DOCKER_IMAGE_TAG ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD)) 48 | 49 | ifdef DEBUG 50 | bindata_flags = -debug 51 | endif 52 | 53 | all: vendor build test 54 | 55 | style: 56 | @echo ">> checking code style" 57 | @! $(GOFMT) -d $(shell find . -path ./vendor -prune -o -name '*.go' -print) | grep '^' 58 | 59 | deps: 60 | @echo ">> getting dependencies" 61 | $(GO) mod download 62 | 63 | vendor: 64 | @echo ">> building vendor dir" 65 | $(GO) mod vendor 66 | 67 | test-short: 68 | @echo ">> running short tests" 69 | GO111MODULE=$(GO111MODULE) $(GO) test -short $(GOOPTS) $(pkgs) 70 | 71 | test: 72 | @echo ">> running all tests" 73 | GO111MODULE=$(GO111MODULE) $(GO) test $(GOOPTS) $(pkgs) 74 | GO111MODULE=$(GO111MODULE) $(GO) test -race $(GOOPTS) $(pkgs) 75 | 76 | cover: 77 | @echo ">> running all tests with coverage" 78 | GO111MODULE=$(GO111MODULE) $(GO) test -coverprofile=coverage.out $(GOOPTS) $(pkgs) 79 | 80 | format: 81 | @echo ">> formatting code" 82 | # Replace gofmt call once we bump to a more recent Go that supports `-mod=vendor`, probably 1.14. 83 | # GO111MODULE=$(GO111MODULE) $(GO) fmt $(GOOPTS) $(pkgs) 84 | # Avoid formatting anything under vendor/. 85 | $(GOFMT) -l -w $(shell find . -path ./vendor -prune -o -name '*.go' -print) 86 | 87 | vet: 88 | @echo ">> vetting code" 89 | GO111MODULE=$(GO111MODULE) $(GO) vet $(GOOPTS) $(pkgs) 90 | 91 | # TODO: Reenable staticcheck after removing deprecation warnings. 92 | staticcheck: $(STATICCHECK) 93 | @echo ">> running staticcheck" 94 | $(STATICCHECK) $(pkgs) 95 | 96 | goveralls: cover $(GOVERALLS) 97 | ifndef COVERALLS_TOKEN 98 | $(error COVERALLS_TOKEN is undefined, follow https://docs.coveralls.io/go to create one and go to https://coveralls.io to retrieve existing ones) 99 | endif 100 | @echo ">> running goveralls" 101 | $(GOVERALLS) -coverprofile=coverage.out -service=travis-ci -repotoken "${COVERALLS_TOKEN}" 102 | 103 | build: promu vendor 104 | @echo ">> building binaries" 105 | GO111MODULE=$(GO111MODULE) $(PROMU) build --prefix $(PREFIX) -v 106 | 107 | build-linux-amd64: promu vendor 108 | @echo ">> building linux amd64 binaries" 109 | @GO111MODULE=$(GO111MODULE) GOOS=linux GOARCH=amd64 $(PROMU) build --prefix $(PREFIX) -v 110 | 111 | tarball: promu 112 | @echo ">> building release tarball" 113 | @$(PROMU) tarball --prefix $(PREFIX) $(BIN_DIR) 114 | 115 | docker: build-linux-amd64 116 | @echo ">> building docker image" 117 | docker build -t "$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" . 118 | 119 | assets: 120 | @echo ">> writing assets" 121 | $(GO) get -u github.com/jteeuwen/go-bindata/... 122 | go-bindata $(bindata_flags) -pkg ui -o web/ui/bindata.go -ignore '(.*\.map|bootstrap\.js|bootstrap-theme\.css|bootstrap\.css)' web/ui/templates/... web/ui/static/... 123 | $(GO) fmt ./web/ui 124 | 125 | promu: 126 | @echo ">> fetching promu" 127 | $(eval PROMU_TMP := $(shell mktemp -d)) 128 | # TODO this download is expensive, can we use a docker image of promu? 129 | curl -s -L $(PROMU_URL) | tar -xvzf - -C $(PROMU_TMP) 130 | mkdir -p $(FIRST_GOPATH)/bin 131 | cp $(PROMU_TMP)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM)/promu $(FIRST_GOPATH)/bin/promu 132 | rm -r $(PROMU_TMP) 133 | 134 | $(FIRST_GOPATH)/bin/staticcheck: 135 | GOOS= GOARCH= $(GO) get -u honnef.co/go/tools/cmd/staticcheck 136 | 137 | $(FIRST_GOPATH)/bin/goveralls: 138 | GOOS= GOARCH= $(GO) get -u github.com/mattn/goveralls 139 | 140 | .PHONY: all style deps format build test vendor vet assets tarball docker promu staticcheck $(FIRST_GOPATH)/bin/staticcheck goveralls $(FIRST_GOPATH)/bin/goveralls 141 | -------------------------------------------------------------------------------- /PROM_VERSION: -------------------------------------------------------------------------------- 1 | 2.24.1 2 | -------------------------------------------------------------------------------- /RELEASE-PROCESS.md: -------------------------------------------------------------------------------- 1 | # Release Process 2 | 3 | 1. Once a change has been pushed to `main`, the [publish GitHub action](https://github.com/lightstep/opentelemetry-prometheus-sidecar/blob/main/.github/workflows/publish.yml) automatically publishes a new Docker image. See an example [here](https://github.com/lightstep/opentelemetry-prometheus-sidecar/actions/runs/654707395). 4 | 2. Validate the changes by testing the new image. 5 | 3. Update [VERSION](https://github.com/lightstep/opentelemetry-prometheus-sidecar/blob/main/VERSION) and [CHANGELOG.md](https://github.com/lightstep/opentelemetry-prometheus-sidecar/blob/main/CHANGELOG.md) to the updated version. 6 | 4. Create a pull request for the update to the new version 7 | 5. When the pull request is merged, create a tag for the new release. This will trigger the [release GitHub action](https://github.com/lightstep/opentelemetry-prometheus-sidecar/blob/main/.github/workflows/release.yml) which tags the build with the correct version number. 8 | `git tag v0.1.2 && git push --tag` 9 | 6. Copy and paste the CHANGELOG section for the new version into the [release](https://github.com/lightstep/opentelemetry-prometheus-sidecar/releases/tag/v0.19.0). 10 | 11 | ### Automated deplpoyment to Lightstep staging and public enviroments 12 | Both the publish Github Action and the Release Github Action initiate the [staging-approval-public Github Action](https://github.com/lightstep/opentelemetry-prometheus-sidecar/actions/workflows/staging-approval-public.yml). If the initiation is from the publish Github Action the OTEL sidecar image tag will be set to the commit sha, if the initiation is from the release Github Action the OTEL sidecar image tag will be set to the tag. 13 | 14 | 15 | The `staging-approval-public` Github Action initiates deployment to staging (passing in the relevant otel-sidecar image tag) and then waits for approval before deploying to public (see [Approval for deployment to public](#approval-for-deployment-to-public) below). The deployments themselves are carried out by the Codefresh listed pipelines `deploy-prometheus-to-stg` and `deploy-prometheus-to-pub` under [this Codefresh Project](https://g.codefresh.io/projects/prom-stack/edit/pipelines/?projectId=60affcb1860d2d30404b2317) 16 | 17 | * A Video walkthrough can be found [here](https://lightstep.atlassian.net/wiki/spaces/EPD/pages/2558689283/Prometheus#Releasing-the-OTEL-Prometheus-sidecar) 18 | 19 | ### Approval for deployment to public 20 | The approval is implemented using a Github Environment protection rule [here](https://github.com/lightstep/opentelemetry-prometheus-sidecar/settings/environments). Owners of this repo can modify the approvers list by navigating to `Settings > Enviroments > public` 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.27.0 2 | -------------------------------------------------------------------------------- /cmd/internal/config.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "github.com/go-kit/kit/log" 5 | "github.com/lightstep/opentelemetry-prometheus-sidecar/common" 6 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 7 | "github.com/lightstep/opentelemetry-prometheus-sidecar/leader" 8 | "github.com/lightstep/opentelemetry-prometheus-sidecar/metadata" 9 | "github.com/lightstep/opentelemetry-prometheus-sidecar/otlp" 10 | "github.com/lightstep/opentelemetry-prometheus-sidecar/prometheus" 11 | "github.com/prometheus/prometheus/pkg/labels" 12 | ) 13 | 14 | type SidecarConfig struct { 15 | ClientFactory otlp.StorageClientFactory 16 | Monitor *prometheus.Monitor 17 | Logger log.Logger 18 | 19 | // InstanceId is a unique identifer for this process. 20 | InstanceId string 21 | Matchers [][]*labels.Matcher 22 | MetricRenames map[string]string 23 | MetadataCache *metadata.Cache 24 | 25 | FailingReporter common.FailingReporter 26 | 27 | LeaderCandidate leader.Candidate 28 | 29 | config.MainConfig 30 | } 31 | -------------------------------------------------------------------------------- /cmd/internal/logger.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "github.com/go-kit/kit/log" 5 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 6 | "github.com/lightstep/opentelemetry-prometheus-sidecar/telemetry" 7 | "github.com/prometheus/common/promlog" 8 | ) 9 | 10 | func NewLogger(cfg config.MainConfig, isSupervisor bool) log.Logger { 11 | vlevel := cfg.LogConfig.Verbose 12 | if cfg.LogConfig.Level == "debug" { 13 | vlevel++ 14 | } 15 | 16 | if vlevel > 0 { 17 | telemetry.SetVerboseLevel(vlevel) 18 | } 19 | 20 | var plc promlog.Config 21 | plc.Level = &promlog.AllowedLevel{} 22 | plc.Format = &promlog.AllowedFormat{} 23 | plc.Level.Set(cfg.LogConfig.Level) 24 | plc.Format.Set(cfg.LogConfig.Format) 25 | 26 | logger := promlog.New(&plc) 27 | if isSupervisor { 28 | logger = log.With(logger, "supervisor", "true") 29 | } 30 | return logger 31 | } 32 | -------------------------------------------------------------------------------- /cmd/internal/otlp_factory.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "github.com/lightstep/opentelemetry-prometheus-sidecar/otlp" 5 | ) 6 | 7 | type otlpClientFactory otlp.ClientConfig 8 | 9 | var _ otlp.StorageClientFactory = otlpClientFactory{} 10 | 11 | func NewOTLPClientFactory(cc otlp.ClientConfig) otlp.StorageClientFactory { 12 | return otlpClientFactory(cc) 13 | } 14 | 15 | func (s otlpClientFactory) New() otlp.StorageClient { 16 | return otlp.NewClient(otlp.ClientConfig(s)) 17 | } 18 | 19 | func (s otlpClientFactory) Name() string { 20 | return s.URL.String() 21 | } 22 | -------------------------------------------------------------------------------- /cmd/internal/start_components.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "context" 5 | "strings" 6 | 7 | "github.com/go-kit/kit/log" 8 | "github.com/go-kit/kit/log/level" 9 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 10 | "github.com/lightstep/opentelemetry-prometheus-sidecar/otlp" 11 | "github.com/lightstep/opentelemetry-prometheus-sidecar/retrieval" 12 | "github.com/lightstep/opentelemetry-prometheus-sidecar/tail" 13 | "github.com/oklog/run" 14 | "github.com/prometheus/prometheus/pkg/labels" 15 | "github.com/prometheus/prometheus/tsdb/wal" 16 | ) 17 | 18 | // externalLabelPrefix is a non-standard convention for indicating 19 | // external labels in the Prometheus data model, which are not 20 | // semantically defined in OTel, as recognized by Lightstep. 21 | const externalLabelPrefix = "__external_" 22 | 23 | // createPrimaryDestinationResourceLabels returns the OTLP resources 24 | // to use for the primary destination. 25 | func createPrimaryDestinationResourceLabels(svcInstanceId string, externalLabels labels.Labels, extraLabels map[string]string) labels.Labels { 26 | // TODO: Enable and test the following line, as https://github.com/lightstep/opentelemetry-prometheus-sidecar/issues/44 27 | // has been merged. 28 | // extraLabels[externalLabelPrefix+string(semconv.ServiceInstanceIDKey)] 29 | // = svcInstanceId 30 | 31 | allLabels := make(map[string]string) 32 | for _, label := range externalLabels { 33 | allLabels[externalLabelPrefix+label.Name] = label.Value 34 | } 35 | for name, value := range extraLabels { 36 | allLabels[name] = value 37 | } 38 | 39 | return labels.FromMap(allLabels) 40 | } 41 | 42 | func NewTailer(ctx context.Context, scfg SidecarConfig) (*tail.Tailer, error) { 43 | return tail.Tail( 44 | ctx, 45 | log.With(scfg.Logger, "component", "wal_reader"), 46 | scfg.Prometheus.WAL, 47 | scfg.Monitor, 48 | ) 49 | } 50 | 51 | func StartComponents(ctx context.Context, scfg SidecarConfig, tailer tail.WalTailer, startOffset int) error { 52 | var err error 53 | attempts := 0 54 | currentSegment := 0 55 | for { 56 | currentSegment, err = runComponents(ctx, scfg, tailer, startOffset) 57 | if err != nil && attempts < config.DefaultMaxRetrySkipSegments && strings.Contains(err.Error(), tail.ErrSkipSegment.Error()) { 58 | _ = tailer.Close() 59 | _ = retrieval.SaveProgressFile(scfg.Prometheus.WAL, startOffset) 60 | tailer, err = NewTailer(ctx, scfg) 61 | if err != nil { 62 | _ = level.Error(scfg.Logger).Log("msg", "tailing WAL failed", "err", err) 63 | break 64 | } 65 | attempts += 1 66 | // The following check is to ensure that if a sidecar error'd on 67 | // a truncated segment and the truncation was *not* due to a checkpoint, 68 | // that truncated segment is skipped when the reader is restart. Otherwise 69 | // the reader will reset nextSegment to the next segment after the 70 | // checkpoint and hit the same truncated file. NOTE: if the truncation 71 | // was caused by a checkpoint, we shouldn't do anything and let the 72 | // reader continue on. 73 | // 74 | // NOTE: this case *should* never happen 75 | if currentSegment > tailer.CurrentSegment() { 76 | _ = level.Warn(scfg.Logger).Log("msg", "unexpected segment truncation", "currentSegment", err, "tailer.CurrentSegment", tailer.CurrentSegment()) 77 | tailer.SetNextSegment(currentSegment + 1) 78 | startOffset = currentSegment * wal.DefaultSegmentSize 79 | } 80 | 81 | continue 82 | } 83 | break 84 | } 85 | return err 86 | } 87 | 88 | func runComponents(ctx context.Context, scfg SidecarConfig, tailer tail.WalTailer, startOffset int) (int, error) { 89 | // Run two inter-dependent components: 90 | // (1) Prometheus reader 91 | // (2) Queue manager 92 | // TODO: Replace this with x/sync/errgroup 93 | currentSegment := 0 94 | queueManager, err := otlp.NewQueueManager( 95 | log.With(scfg.Logger, "component", "queue_manager"), 96 | scfg.QueueConfig(), 97 | scfg.Destination.Timeout.Duration, 98 | scfg.ClientFactory, 99 | tailer, 100 | retrieval.LabelsToResource(createPrimaryDestinationResourceLabels( 101 | scfg.InstanceId, 102 | scfg.Monitor.GetGlobalConfig().ExternalLabels, 103 | scfg.Destination.Attributes)), 104 | ) 105 | if err != nil { 106 | _ = level.Error(scfg.Logger).Log("msg", "creating queue manager failed", "err", err) 107 | return currentSegment, err 108 | } 109 | 110 | prometheusReader := retrieval.NewPrometheusReader( 111 | log.With(scfg.Logger, "component", "prom_wal"), 112 | scfg.Prometheus.WAL, 113 | tailer, 114 | scfg.Matchers, 115 | scfg.MetricRenames, 116 | scfg.MetadataCache, 117 | queueManager, 118 | scfg.OpenTelemetry.MetricsPrefix, 119 | scfg.Prometheus.MaxPointAge.Duration, 120 | scfg.Monitor.GetScrapeConfig(), 121 | scfg.FailingReporter, 122 | scfg.LeaderCandidate, 123 | ) 124 | 125 | var g run.Group 126 | { 127 | g.Add( 128 | func() error { 129 | _ = level.Info(scfg.Logger).Log("msg", "starting Prometheus reader", "segment", startOffset/wal.DefaultSegmentSize) 130 | return prometheusReader.Run(ctx, startOffset) 131 | }, 132 | func(err error) { 133 | // Prometheus reader needs to be stopped before closing the TSDB 134 | // so that it doesn't try to write samples to a closed storage. 135 | // See the use of `stopCh` below to explain how this works. 136 | _ = level.Info(scfg.Logger).Log("msg", "stopping Prometheus reader") 137 | }, 138 | ) 139 | } 140 | { 141 | stopCh := make(chan struct{}) 142 | g.Add( 143 | func() error { 144 | if err := queueManager.Start(); err != nil { 145 | return err 146 | } 147 | _ = level.Info(scfg.Logger).Log("msg", "starting OpenTelemetry writer") 148 | <-stopCh 149 | return nil 150 | }, 151 | func(err error) { 152 | if err := queueManager.Stop(); err != nil { 153 | _ = level.Error(scfg.Logger).Log( 154 | "msg", "stopping OpenTelemetry writer", 155 | "err", err, 156 | ) 157 | } 158 | _ = level.Info(scfg.Logger).Log("msg", "stopping OpenTelemetry writer") 159 | close(stopCh) 160 | }, 161 | ) 162 | } 163 | 164 | if err := g.Run(); err != nil { 165 | _ = level.Error(scfg.Logger).Log("msg", "run loop error", "err", err) 166 | return prometheusReader.CurrentSegment(), err 167 | } 168 | return prometheusReader.CurrentSegment(), nil 169 | } 170 | -------------------------------------------------------------------------------- /cmd/internal/start_components_test.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "os" 7 | "testing" 8 | 9 | "github.com/go-kit/kit/log" 10 | "github.com/lightstep/opentelemetry-prometheus-sidecar/prometheus" 11 | "github.com/lightstep/opentelemetry-prometheus-sidecar/tail" 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | type fakeTailer struct { 16 | readError error 17 | sizeError error 18 | next int 19 | } 20 | 21 | func (t *fakeTailer) Size() (int, error) { 22 | return 0, t.sizeError 23 | } 24 | 25 | func (t *fakeTailer) Next() { 26 | } 27 | 28 | func (t *fakeTailer) Offset() int { 29 | return 0 30 | } 31 | 32 | func (t *fakeTailer) Close() error { 33 | return nil 34 | } 35 | 36 | func (t *fakeTailer) CurrentSegment() int { 37 | return t.next 38 | } 39 | 40 | func (t *fakeTailer) Read(b []byte) (int, error) { 41 | return 0, t.readError 42 | } 43 | 44 | func (t *fakeTailer) SetNextSegment(next int) { 45 | t.next = next 46 | } 47 | 48 | var _ tail.WalTailer = &fakeTailer{} 49 | 50 | func TestStartComponents(t *testing.T) { 51 | // test that we only loop for err skip segment 52 | logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) 53 | scfg := SidecarConfig{} 54 | scfg.Monitor = &prometheus.Monitor{} 55 | scfg.Logger = logger 56 | ctx := context.Background() 57 | tailer := fakeTailer{ 58 | sizeError: errors.New("failed to get size"), 59 | readError: errors.New("failed to read"), 60 | } 61 | err := StartComponents(ctx, scfg, &tailer, 0) 62 | require.Error(t, err) 63 | 64 | tailer = fakeTailer{ 65 | sizeError: tail.ErrSkipSegment, 66 | readError: errors.New("failed to read"), 67 | } 68 | err = StartComponents(ctx, scfg, &tailer, 0) 69 | require.Error(t, err) 70 | 71 | tailer = fakeTailer{ 72 | readError: errors.New("failed to read"), 73 | } 74 | err = StartComponents(ctx, scfg, &tailer, 0) 75 | require.Error(t, err) 76 | 77 | tailer = fakeTailer{ 78 | readError: tail.ErrSkipSegment, 79 | } 80 | err = StartComponents(ctx, scfg, &tailer, 0) 81 | require.Error(t, err) 82 | 83 | } 84 | -------------------------------------------------------------------------------- /cmd/internal/start_leader.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "math/rand" 7 | "strings" 8 | "time" 9 | 10 | "github.com/go-kit/kit/log" 11 | "github.com/go-kit/kit/log/level" 12 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 13 | "github.com/lightstep/opentelemetry-prometheus-sidecar/leader" 14 | "github.com/pkg/errors" 15 | ) 16 | 17 | const ( 18 | // Conventions. Do we need to configure these? 19 | nameKey = "prometheus" 20 | IDKey = "prometheus_replica" 21 | ) 22 | 23 | func cleanName(name string) string { 24 | name = strings.Replace(name, "/", "-", -1) 25 | name = strings.Replace(name, "_", "-", -1) 26 | if len(name) > 64 { 27 | name = name[len(name)-64:] 28 | } 29 | return name 30 | } 31 | 32 | func StartLeaderElection(ctx context.Context, cfg *SidecarConfig) error { 33 | externalLabels := cfg.Monitor.GetGlobalConfig().ExternalLabels 34 | 35 | lockNamespace := cfg.LeaderElection.K8S.Namespace 36 | if lockNamespace == "" { 37 | lockNamespace = config.LeaderLockDefaultNamespace 38 | } 39 | 40 | lockName := cleanName(externalLabels.Get(nameKey)) 41 | if lockName == "" { 42 | lockName = config.LeaderLockDefaultName 43 | } 44 | lockID := cleanName(externalLabels.Get(IDKey)) 45 | if lockID == "" { 46 | src := rand.NewSource(time.Now().UnixNano()) 47 | r := rand.New(src) 48 | 49 | lockID = fmt.Sprintf("unlabeled-%016x", r.Uint64()) 50 | } 51 | 52 | logger := log.With(cfg.Logger, "component", "leader") 53 | 54 | client, err := leader.NewClient() 55 | if err != nil { 56 | return errors.Wrap(err, "leader election client") 57 | } 58 | 59 | cfg.LeaderCandidate, err = leader.NewKubernetesCandidate( 60 | client, 61 | lockNamespace, 62 | lockName, 63 | lockID, 64 | leader.LoggingController{logger}, 65 | logger, 66 | ) 67 | if err != nil { 68 | return errors.Wrap(err, "leader election candidate") 69 | } 70 | 71 | level.Info(cfg.Logger).Log( 72 | "msg", "starting leader election", 73 | "namespace", lockNamespace, 74 | "name", lockName, 75 | "ID", lockID, 76 | ) 77 | 78 | if err := cfg.LeaderCandidate.Start(ctx); err != nil { 79 | return errors.Wrap(err, "leader election start") 80 | } 81 | return nil 82 | } 83 | -------------------------------------------------------------------------------- /cmd/internal/start_telemetry.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "net" 7 | "net/url" 8 | "time" 9 | 10 | "github.com/go-kit/kit/log" 11 | "github.com/go-kit/kit/log/level" 12 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 13 | "github.com/lightstep/opentelemetry-prometheus-sidecar/telemetry" 14 | "github.com/prometheus/prometheus/pkg/labels" 15 | semconv "go.opentelemetry.io/otel/semconv/v1.4.0" 16 | ) 17 | 18 | type ShutdownFunc func(context.Context) 19 | 20 | func StartTelemetry(scfg SidecarConfig, defaultSvcName string, isSuper bool, externalLabels labels.Labels) *telemetry.Telemetry { 21 | diagConfig := scfg.Diagnostics 22 | 23 | if scfg.DisableDiagnostics { 24 | return telemetry.InternalOnly() 25 | } 26 | 27 | if diagConfig.Endpoint == "" { 28 | // Create a copy, as we adjust the headers/attributes. 29 | diagConfig = scfg.Destination.Copy() 30 | } 31 | 32 | if diagConfig.Endpoint == "" { 33 | return telemetry.InternalOnly() 34 | } 35 | 36 | // reportingPeriod should be faster than the health check period, 37 | // because we are using metrics data for internal health checking. 38 | reportingPeriod := scfg.Admin.HealthCheckPeriod.Duration / 2 39 | 40 | return startTelemetry(diagConfig, reportingPeriod, defaultSvcName, scfg.InstanceId, isSuper, externalLabels, scfg.Logger) 41 | } 42 | 43 | func startTelemetry(diagConfig config.OTLPConfig, reportingPeriod time.Duration, defaultSvcName string, svcInstanceId string, isSuper bool, externalLabels labels.Labels, logger log.Logger) *telemetry.Telemetry { 44 | endpoint, _ := url.Parse(diagConfig.Endpoint) 45 | hostport := endpoint.Hostname() 46 | if len(endpoint.Port()) > 0 { 47 | hostport = net.JoinHostPort(hostport, endpoint.Port()) 48 | } 49 | 50 | insecure := endpoint.Scheme == "http" 51 | metricsHostport := hostport 52 | spanHostport := hostport 53 | 54 | svcName := diagConfig.Attributes[string(semconv.ServiceNameKey)] 55 | if svcName == "" { 56 | svcName = defaultSvcName 57 | } 58 | 59 | agentName := config.AgentSecondaryValue 60 | if isSuper { 61 | // Disable metrics in the supervisor 62 | metricsHostport = "" 63 | svcName = svcName + "-supervisor" 64 | agentName = config.AgentSupervisorValue 65 | } else { 66 | // Disable spans in the main process 67 | spanHostport = "" 68 | } 69 | 70 | diagConfig.Headers[config.AgentKey] = agentName 71 | diagConfig.Attributes[string(semconv.ServiceNameKey)] = svcName 72 | diagConfig.Attributes[string(semconv.ServiceInstanceIDKey)] = svcInstanceId 73 | 74 | // No need to add an external-label-prefix for the secondary target. 75 | for _, label := range externalLabels { 76 | diagConfig.Attributes[label.Name] = label.Value 77 | } 78 | 79 | // No need to log this for the supervisor case. 80 | if !isSuper { 81 | level.Info(logger).Log( 82 | "msg", "configuring sidecar diagnostics", 83 | "attributes", fmt.Sprintf("%s", diagConfig.Attributes), 84 | ) 85 | } 86 | 87 | // TODO: Configure trace batching interval. 88 | 89 | return telemetry.ConfigureOpentelemetry( 90 | telemetry.WithLogger(logger), 91 | telemetry.WithSpanExporterEndpoint(spanHostport), 92 | telemetry.WithSpanExporterInsecure(insecure), 93 | telemetry.WithMetricsExporterEndpoint(metricsHostport), 94 | telemetry.WithMetricsExporterInsecure(insecure), 95 | telemetry.WithHeaders(diagConfig.Headers), 96 | telemetry.WithResourceAttributes(diagConfig.Attributes), 97 | telemetry.WithExportTimeout(diagConfig.Timeout.Duration), 98 | telemetry.WithMetricReportingPeriod(reportingPeriod), 99 | telemetry.WithCompressor(diagConfig.Compression), 100 | ) 101 | } 102 | -------------------------------------------------------------------------------- /cmd/opentelemetry-prometheus-sidecar/fdlimits_default.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | // +build !windows 15 | 16 | package main 17 | 18 | import ( 19 | "fmt" 20 | "log" 21 | "syscall" 22 | ) 23 | 24 | // FdLimits returns the soft and hard limits for file descriptors 25 | func FdLimits() string { 26 | flimit := syscall.Rlimit{} 27 | err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &flimit) 28 | if err != nil { 29 | log.Fatal("Error!") 30 | } 31 | return fmt.Sprintf("(soft=%d, hard=%d)", flimit.Cur, flimit.Max) 32 | } 33 | -------------------------------------------------------------------------------- /cmd/opentelemetry-prometheus-sidecar/fdlimits_windows.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | // +build windows 15 | 16 | package main 17 | 18 | // FdLimits not supported on Windows 19 | func FdLimits() string { 20 | return "N/A" 21 | } 22 | -------------------------------------------------------------------------------- /cmd/opentelemetry-prometheus-sidecar/main_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package main 15 | 16 | import ( 17 | "bytes" 18 | "context" 19 | "net/http" 20 | "os" 21 | "os/exec" 22 | "testing" 23 | "time" 24 | 25 | "github.com/lightstep/opentelemetry-prometheus-sidecar/internal/promtest" 26 | "github.com/stretchr/testify/require" 27 | ) 28 | 29 | // Note: the tests would be cleaner in this package if 30 | // var bout, berr bytes.Buffer 31 | // cmd.Stdout = &bout 32 | // cmd.Stderr = &berr 33 | // followed by 34 | // t.Logf("stdout: %v\n", bout.String()) 35 | // t.Logf("stderr: %v\n", berr.String()) 36 | // and assertions was replaced with a helper class. When the test 37 | // times out unexpectedly, this information is not being printed, 38 | // which the helper could fix with a pipe. 39 | 40 | func TestMain(m *testing.M) { 41 | if os.Getenv("RUN_MAIN") == "" { 42 | // Run the test directly. 43 | os.Exit(m.Run()) 44 | } 45 | 46 | main() 47 | } 48 | 49 | func (ts *testServer) runPrometheusService(cfg promtest.Config) { 50 | fp := promtest.NewFakePrometheus(cfg) 51 | address := "0.0.0.0:19093" 52 | server := &http.Server{ 53 | Addr: address, 54 | Handler: fp.ServeMux(), 55 | } 56 | ctx, cancel := context.WithCancel(context.Background()) 57 | 58 | ts.stops <- cancel 59 | 60 | go server.ListenAndServe() 61 | 62 | go func() { 63 | <-ctx.Done() 64 | server.Shutdown(ctx) 65 | }() 66 | } 67 | 68 | // As soon as prometheus starts responding to http request should be able to accept Interrupt signals for a gracefull shutdown. 69 | func TestStartupInterrupt(t *testing.T) { 70 | if testing.Short() { 71 | t.Skip("skipping test in short mode.") 72 | } 73 | 74 | cmd := exec.Command( 75 | os.Args[0], 76 | append(e2eTestMainCommonFlags, 77 | "--prometheus.wal=testdata/wal", 78 | "--log.level=debug", // The tests below depend on debug logs 79 | )...) 80 | 81 | cmd.Env = append(os.Environ(), "RUN_MAIN=1") 82 | var bout, berr bytes.Buffer 83 | cmd.Stdout = &bout 84 | cmd.Stderr = &berr 85 | err := cmd.Start() 86 | if err != nil { 87 | t.Errorf("execution error: %v", err) 88 | return 89 | } 90 | 91 | done := make(chan error) 92 | go func() { 93 | done <- cmd.Wait() 94 | }() 95 | 96 | var startedOk bool 97 | var stoppedErr error 98 | 99 | Loop: 100 | // This loop sleeps allows least 10 seconds to pass. 101 | for x := 0; x < 10; x++ { 102 | // Waits for the sidecar's /-/ready handler 103 | if resp, err := http.Get(e2eReadyURL); err == nil && resp.StatusCode/100 == 2 { 104 | startedOk = true 105 | cmd.Process.Signal(os.Interrupt) 106 | select { 107 | case stoppedErr = <-done: 108 | break Loop 109 | case <-time.After(10 * time.Second): 110 | } 111 | break Loop 112 | } else { 113 | select { 114 | case stoppedErr = <-done: 115 | break Loop 116 | default: // try again 117 | } 118 | } 119 | time.Sleep(time.Second) 120 | } 121 | 122 | if !startedOk { 123 | t.Errorf("opentelemetry-prometheus-sidecar didn't start in the specified timeout") 124 | return 125 | } 126 | if err := cmd.Process.Kill(); err == nil { 127 | t.Errorf("opentelemetry-prometheus-sidecar didn't shutdown after sending the Interrupt signal") 128 | } 129 | const expected = "Prometheus is not ready: context canceled" 130 | require.Error(t, stoppedErr) 131 | require.Contains(t, stoppedErr.Error(), "exit status 1") 132 | 133 | // Because the fake endpoint was started after the start of 134 | // the test, we should see some gRPC warnings the connection up 135 | // until --startup.timeout takes effect. 136 | require.Contains(t, berr.String(), expected) 137 | 138 | // The process should have been interrupted. 139 | require.Contains(t, berr.String(), "received SIGTERM, exiting") 140 | 141 | // The selftest was interrupted. 142 | require.Contains(t, berr.String(), "selftest failed, not starting") 143 | } 144 | 145 | func TestMainExitOnFailure(t *testing.T) { 146 | cmd := exec.Command( 147 | os.Args[0], 148 | "--totally-bogus-flag-name=testdata/wal", 149 | ) 150 | 151 | cmd.Env = append(os.Environ(), "RUN_MAIN=1") 152 | var berr bytes.Buffer 153 | cmd.Stderr = &berr 154 | require.NoError(t, cmd.Start()) 155 | 156 | require.Error(t, cmd.Wait()) 157 | require.Contains(t, berr.String(), "totally-bogus-flag-name") 158 | } 159 | 160 | func TestParseFilters(t *testing.T) { 161 | for _, tt := range []struct { 162 | name string 163 | filtersets []string 164 | wantMatchers int 165 | }{ 166 | {"just filtersets", []string{"metric_name"}, 1}, 167 | {"no filtersets", []string{}, 0}, 168 | } { 169 | t.Run(tt.name, func(t *testing.T) { 170 | // Test success cases. 171 | parsed, err := parseFilters(tt.filtersets) 172 | if err != nil { 173 | t.Fatal(err) 174 | } 175 | if len(parsed) != tt.wantMatchers { 176 | t.Fatalf("expected %d matchers; got %d", tt.wantMatchers, len(parsed)) 177 | } 178 | }) 179 | } 180 | 181 | // Test failure cases. 182 | for _, tt := range []struct { 183 | name string 184 | filtersets []string 185 | }{ 186 | {"Invalid operator in filterset", []string{`{a!=="1"}`}}, 187 | {"Empty filterset", []string{""}}, 188 | } { 189 | t.Run(tt.name, func(t *testing.T) { 190 | if _, err := parseFilters(tt.filtersets); err == nil { 191 | t.Fatalf("expected error, but got none") 192 | } 193 | }) 194 | } 195 | } 196 | 197 | func TestStartupUnhealthyEndpoint(t *testing.T) { 198 | // Tests that the selftest detects an unhealthy endpoint during the selftest. 199 | if testing.Short() { 200 | t.Skip("skipping test in short mode.") 201 | } 202 | 203 | cmd := exec.Command( 204 | os.Args[0], 205 | append(e2eTestMainCommonFlags, 206 | "--prometheus.wal=testdata/wal", 207 | "--startup.timeout=5s", 208 | "--destination.timeout=1s", 209 | )...) 210 | 211 | cmd.Env = append(os.Environ(), "RUN_MAIN=1") 212 | var bout, berr bytes.Buffer 213 | cmd.Stdout = &bout 214 | cmd.Stderr = &berr 215 | err := cmd.Start() 216 | if err != nil { 217 | t.Errorf("execution error: %v", err) 218 | return 219 | } 220 | defer cmd.Wait() 221 | defer cmd.Process.Kill() 222 | 223 | ts := newTestServer(t, nil) 224 | defer ts.Stop() 225 | ts.runPrometheusService(promtest.Config{}) 226 | 227 | cmd.Wait() 228 | 229 | t.Logf("stdout: %v\n", bout.String()) 230 | t.Logf("stderr: %v\n", berr.String()) 231 | 232 | require.Contains(t, berr.String(), "selftest failed, not starting") 233 | require.Contains(t, berr.String(), "selftest recoverable error, still trying") 234 | } 235 | -------------------------------------------------------------------------------- /cmd/opentelemetry-prometheus-sidecar/testdata/certs/README.md: -------------------------------------------------------------------------------- 1 | These files were generated by github.com/square/certstrap v1.2. 2 | 3 | ``` 4 | certstrap init --common-name root_ca 5 | certstrap request-cert --domain sidecar.test --ip 127.0.0.1 6 | certstrap sign --CA root_ca sidecar.test 7 | ``` 8 | -------------------------------------------------------------------------------- /cmd/opentelemetry-prometheus-sidecar/testdata/certs/root_ca.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIE5DCCAsygAwIBAgIBATANBgkqhkiG9w0BAQsFADASMRAwDgYDVQQDDAdyb290 3 | X2NhMB4XDTIwMTEyMDA2MzgwOFoXDTIyMDUyMDA2MzgwNFowEjEQMA4GA1UEAwwH 4 | cm9vdF9jYTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAL4O2HEA1Zok 5 | xvJk+qPzfO6JDcygLAod8OIvlPf7YlsZWqbDOF9sipfgZQCvPA2iSBW8wSF74979 6 | fT3JY5dfdSkHojaIR6vRzWXffu/aELnRifP9DgdMb7B6WFpqKSqkdp5qm6LlOwbw 7 | bipg/j6/6C6pBvBBIGrw3Ewo6Q72SfD/W4jAcpON9oIzIhj3H0j3Y1Azb83B5guC 8 | Qt/TsetbWfodhlJNOqJ+0EvjUuGXHlBYFw4fmd09wdGt+Rmug/Z0faB/klxoe6p6 9 | ceOcWcRMMafApmixGa3Sem46MdO4eoeAOOy9bd50TgnTeTTQBGTqsKGzKgOVEIcZ 10 | GhlRy9gnTtqlrqUosHRNcI3YrIJyhSz+T1apK5vtpi203k5NEYrRebZI8tEeymFY 11 | EUE39rofKEbNjH4KGloYAfXIKeGYdNA9cbUcHkLTygK5SnUAzgjmFUrLG/144yCJ 12 | qS362wLiQhWQd7RgBrd+AaCmEKARt1yRNmf8UagVbP1bXSa5iwTgnFaTkIPgXGA4 13 | UWqp435XFfSMC/p450Saef3ne2qNEcbwX7PUaFLzHLBwSn0OdvQuFwqo2ElYkpEb 14 | WH+UOuzeLJ2XZpGABpzETKdgNFl/wRUF3pKC2VbPRt6k2dtP7o/kqlmztsoxUSDd 15 | YVxK8M6kkzcKbKYa6uOTD4yu+b8QZjYjAgMBAAGjRTBDMA4GA1UdDwEB/wQEAwIB 16 | BjASBgNVHRMBAf8ECDAGAQH/AgEAMB0GA1UdDgQWBBRiy9lF+tmr+FzJlhnyQGGZ 17 | xw5qtDANBgkqhkiG9w0BAQsFAAOCAgEAB+9YMM8dy7LCQQonB2R6eRsbBzXs2pzk 18 | SGem9ObMuiFUK85DsBlNK8vIPcjIzVThdEHNhdweyAA3Cxa8/xJ/XY7+MfgMgbIM 19 | oTjj6DqUzmZrV1GngvFF5QVdYOQQ2iXhBrYiw5a7gt73V8XFi4S42KtbxqqM853w 20 | pnnAPvAw/dfUhs8B404f3TtMPPwP2uKadp1fdOTih/NFnOrQX60V8daGacNkMBjf 21 | yhjXPHaiKYBINOJLVwoWLFdu0SfRXKiP2U/iLcC0sQYbIEnP0PdV/17i2WNCH7iP 22 | 53amfCcHtWtZLa5vJGWcY1ZPT/NuLiyNDgtrldbwIKjTVs3wElUMtz+H6qERdSNe 23 | kR7KyEiyGl9WSS14VWcwnLrJ+gsAQv3mA3BANJrYiunG5j/Tgg/RKtfDC8PyFJ0G 24 | dfbh73876L1hpSZ4hFTCwgHjsmIPFQ26olfzL2qkqPQzu7QEJBiLaxIwxS93k3R+ 25 | Lz7pjh9Z1IuqqsN4gD/6hXA3wYLLokVTM/IXNX6SdvwIghjef7E7SrMdsbYWgk7G 26 | FE9D61T3wvPX1dbTPJrCZgb/GvecKilZ7Mw363XoTwM57kS7GFE1BQYqvzerZ2JO 27 | E2MUDSbfjfAoLbyzjVGP0XwrHhMplx4F2gIp6nWYpkX96yQJqIbD7z+Cshmb6geT 28 | URhmbdzYHN0= 29 | -----END CERTIFICATE----- 30 | -------------------------------------------------------------------------------- /cmd/opentelemetry-prometheus-sidecar/testdata/certs/sidecar.test.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIERTCCAi2gAwIBAgIQQVHVGrOM6cUXOQevJfZyYzANBgkqhkiG9w0BAQsFADAS 3 | MRAwDgYDVQQDDAdyb290X2NhMB4XDTIwMTEyMDA2Mzk0N1oXDTIyMDUyMDA2Mzgw 4 | NFowFzEVMBMGA1UEAxMMc2lkZWNhci50ZXN0MIIBIjANBgkqhkiG9w0BAQEFAAOC 5 | AQ8AMIIBCgKCAQEApg7vpJAcPuvbfINw1vwIordXP1b+jDHlA/wjVio0MkocaXey 6 | dePwfiPy9jvBuwpp4nD5LLMe1CwtnRKedKpk1jMBS+7TWLfdy2m/LjKgS8U4rCsc 7 | RSfjrsrQ/pmDGZO5Zf64blYRIneA/2x01WgQLbakIQgRMgICzO/ZtGJjsxFTHcnK 8 | tmfxZcOCm6EOP97oHD4+WXglQbbWyVo9DR6oqocWiqDhAw4DlTaXaHhdNuGT3roh 9 | g8K1HQCcaLO3KqXIQSoByhIM/7OqAbwTNra9tijMiSeJI8ro+CoHLtvknz7JNymG 10 | GajWjuhW1QxF4AMJB0nFCFX9xCw5rC3CjXdZ8wIDAQABo4GRMIGOMA4GA1UdDwEB 11 | /wQEAwIDuDAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwHQYDVR0OBBYE 12 | FAx3htsI2Q5fMLH6TyMv2NWMNK1wMB8GA1UdIwQYMBaAFGLL2UX62av4XMmWGfJA 13 | YZnHDmq0MB0GA1UdEQQWMBSCDHNpZGVjYXIudGVzdIcEfwAAATANBgkqhkiG9w0B 14 | AQsFAAOCAgEAFxQPWXUqgRJTBhjAZtMaJSO7D/LEXX15tAMKFlm0cpZ6GpaNgLkY 15 | 4NR8UXwurpoYT/aRp+jiFzz3c54EBZ4sPeVhc1Zw/FtkQ9KmUQTwGGUXrU7ke7L3 16 | VNWBhR3HhD/rvmbkOqWIGecUAxuKxlvCWxVLWoGygsRW3hzHU28Pcx8KBxeF2TNM 17 | pnJajmwHOJRgs+GJWuaHJZdK9IN9eUMc+rvTGG5KhJO5WfA7gb4OtZoVIYneHztg 18 | Qwkf44KOJmOqpPqT5RMXObKj1aEozzSVudfTJVjUmkuYJLiGUQCm80W+651FIpIc 19 | D7H+Qds5sTOtqIoxX704MzqIHCX/kzVNF61kRFjeqdxaPi0I34CAtBYilaGgrEa4 20 | QVs1qfKZ2O+eGTMt7sUU0xQ5Wsh8YEhb5cyjnWZIxHrlggP0GLRwIkD1EWOWYH6s 21 | zXKW3CIywHxMY370Kcs4Dk4OYzK1Qu6n6HXNngelHYk6/UrnUE1dR/8O4cYJN2q4 22 | nzSm2pip5fTMmpuZef7O7kM+X8wZBS5s+TReZXoJzdSTnssrfifkJi3Enpde4hcc 23 | KFkxw8cV6Bmo4obNb+Veg3vL6Cq4Qctsf8OzBWVq5+3vaf2zigwbYTrZ+ZmqKMHE 24 | DKzhnVcKgtVyCnkFvD5Ei/ROT7z9Ny6T57+f4S63MZik5YUAyHq5DvA= 25 | -----END CERTIFICATE----- 26 | -------------------------------------------------------------------------------- /cmd/opentelemetry-prometheus-sidecar/testdata/certs/sidecar.test.key: -------------------------------------------------------------------------------- 1 | -----BEGIN RSA PRIVATE KEY----- 2 | MIIEpAIBAAKCAQEApg7vpJAcPuvbfINw1vwIordXP1b+jDHlA/wjVio0MkocaXey 3 | dePwfiPy9jvBuwpp4nD5LLMe1CwtnRKedKpk1jMBS+7TWLfdy2m/LjKgS8U4rCsc 4 | RSfjrsrQ/pmDGZO5Zf64blYRIneA/2x01WgQLbakIQgRMgICzO/ZtGJjsxFTHcnK 5 | tmfxZcOCm6EOP97oHD4+WXglQbbWyVo9DR6oqocWiqDhAw4DlTaXaHhdNuGT3roh 6 | g8K1HQCcaLO3KqXIQSoByhIM/7OqAbwTNra9tijMiSeJI8ro+CoHLtvknz7JNymG 7 | GajWjuhW1QxF4AMJB0nFCFX9xCw5rC3CjXdZ8wIDAQABAoIBAEN9lpB/sbsxCQfW 8 | NCpsTOY1NdSceuBn9vhjpckDZit6S4niCDo55iieW8UQDHEEjnxmBh/QvRrmybMp 9 | OhWYm1sjZ8eAzY0lTZO66uTP1q50/c+bxeWljLgh3kdcBRiEHboCiNrvSA+GGnu1 10 | jNafcNPjg1mZecOlZZX9dbIGuxavsARtMAqmQpKqq5Ns0tWifmUbMeVekgoldQzz 11 | E4yywkZh3Ti4iKVeXKcDqT1kPQIm/5DmY2IwlAUUNuX78Qto4dv6t2ONo4BnIHB6 12 | AG2Gs6HpXlTpiPhWVUg4ZjTYuchVP+5Ln08xq78LyVRCvjC/0Wpkcl4HaiOh5OaI 13 | ZC6H84ECgYEA2l3E9LvC/csxapOJOmTb97WSJvZKvZgIvpa4OxmHQAktmrUl9hfR 14 | nMn6vMWpNXOFfFcT4NzZeFJqcpPFU1tgw+TkmL627EMba7wb7zbDGEjQEI87cJmQ 15 | 3Fgs/hPNdi8nPKm9PjLvaKWlMTfYG3AcAJccGsGhVT8/PsyE0HuDZ4MCgYEAwq1c 16 | o0+pKy9qKs+JrXbtrdukhwYjTL62wI9vmjsw2XP76hEqyWprPuAk+jBFCmg/NCv6 17 | yeRsFJASfgDiU+Zk/YwqxknHjLT1HcpZljjbh9G2/0UoqnTIFcngXw4Sjt/lpVpz 18 | QyVDfiC0DdEQd0q+/eIb72AF91bK1h3tWaYxSNECgYEAyyHxhwxnXro3lQH75SNP 19 | 8MWqWK5CZs5YTBte9DNSOsKO9tzOnHgZ2PWXVyW0u3FkWn+SkVpAvwpD1aQbKujV 20 | QkXaVTEMbwoaJwVNIyFZKW0KiG0FiUCBnkg0SoLXXbqNkIQB9CtjxIBbBEcDtLp8 21 | ghwP2+AGGYwFZIDSiDJt0csCgYATxkpPqGrVoSZWnOguP5NbAqGOuZfiVsj3fFpo 22 | vzRyEi73krDW7sFgDKKU60xIB1jMMt9xnItB93R5ahuTTCbb/l3l8F6ds0/mq8Q3 23 | 1yStgiblfR/pWJ1uYSgyX5qQ+rs39Z0Zp/JYIHhJov/aLjSCU3Jk+UsB2cYWw8l1 24 | PsVo0QKBgQCrs0UUNH3zvFVwXhnWfuKbXDcEErqpiNlIidUL0YGPEl+6iYUe1XJV 25 | f1tjf4yhnY2ORS+Umd0SXYNrk+AKEoThUD10czCdBfp5ce5gbgDPLxzBf3p6Aahn 26 | 9CSXPnbxMd5v3Fp5On0mJ5PaAMuyoUnI+yMJMaP+wHS0gOJIXw7+zg== 27 | -----END RSA PRIVATE KEY----- 28 | -------------------------------------------------------------------------------- /cmd/opentelemetry-prometheus-sidecar/testdata/wal/000000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightstep/opentelemetry-prometheus-sidecar/a9be6a6ed05478cac75a89b6a463a029ae7900c1/cmd/opentelemetry-prometheus-sidecar/testdata/wal/000000 -------------------------------------------------------------------------------- /cmd/opentelemetry-prometheus-sidecar/uname_default.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | // +build !linux 15 | 16 | package main 17 | 18 | import "runtime" 19 | 20 | // Uname for any platform other than linux. 21 | func Uname() string { 22 | return "(" + runtime.GOOS + ")" 23 | } 24 | -------------------------------------------------------------------------------- /cmd/opentelemetry-prometheus-sidecar/uname_linux.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package main 15 | 16 | import ( 17 | "log" 18 | "syscall" 19 | ) 20 | 21 | // Uname returns the uname of the host machine. 22 | func Uname() string { 23 | buf := syscall.Utsname{} 24 | err := syscall.Uname(&buf) 25 | if err != nil { 26 | log.Fatal("Error!") 27 | } 28 | 29 | str := "(" + charsToString(buf.Sysname[:]) 30 | str += " " + charsToString(buf.Release[:]) 31 | str += " " + charsToString(buf.Version[:]) 32 | str += " " + charsToString(buf.Machine[:]) 33 | str += " " + charsToString(buf.Nodename[:]) 34 | str += " " + charsToString(buf.Domainname[:]) + ")" 35 | return str 36 | } 37 | -------------------------------------------------------------------------------- /cmd/opentelemetry-prometheus-sidecar/uname_linux_int8.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | // +build 386 amd64 arm64 mips64 mips64le mips mipsle 15 | // +build linux 16 | 17 | package main 18 | 19 | func charsToString(ca []int8) string { 20 | s := make([]byte, 0, len(ca)) 21 | for _, c := range ca { 22 | if byte(c) == 0 { 23 | break 24 | } 25 | s = append(s, byte(c)) 26 | } 27 | return string(s) 28 | } 29 | -------------------------------------------------------------------------------- /cmd/opentelemetry-prometheus-sidecar/uname_linux_uint8.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | // +build arm ppc64 ppc64le s390x 15 | // +build linux 16 | 17 | package main 18 | 19 | func charsToString(ca []uint8) string { 20 | s := make([]byte, 0, len(ca)) 21 | for _, c := range ca { 22 | if byte(c) == 0 { 23 | break 24 | } 25 | s = append(s, byte(c)) 26 | } 27 | return string(s) 28 | } 29 | -------------------------------------------------------------------------------- /cmd/opentelemetry-prometheus-sidecar/validation_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package main 15 | 16 | import ( 17 | "bytes" 18 | "context" 19 | "io/ioutil" 20 | "os" 21 | "os/exec" 22 | "testing" 23 | "time" 24 | 25 | "github.com/lightstep/opentelemetry-prometheus-sidecar/common" 26 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 27 | "github.com/lightstep/opentelemetry-prometheus-sidecar/internal/otlptest" 28 | "github.com/lightstep/opentelemetry-prometheus-sidecar/internal/promtest" 29 | "github.com/prometheus/prometheus/pkg/labels" 30 | "github.com/prometheus/prometheus/pkg/textparse" 31 | "github.com/prometheus/prometheus/tsdb/record" 32 | "github.com/prometheus/prometheus/tsdb/wal" 33 | "github.com/stretchr/testify/require" 34 | "go.opentelemetry.io/otel/attribute" 35 | otlpcommon "go.opentelemetry.io/proto/otlp/common/v1" 36 | otlpmetrics "go.opentelemetry.io/proto/otlp/metrics/v1" 37 | otlpresource "go.opentelemetry.io/proto/otlp/resource/v1" 38 | grpcmeta "google.golang.org/grpc/metadata" 39 | ) 40 | 41 | func TestValidationErrorReporting(t *testing.T) { 42 | if testing.Short() { 43 | t.Skip("skipping test in short mode.") 44 | } 45 | 46 | // Create a WAL with 3 series, 5 points. Two of them are 47 | // counters, so after resets we have 3 series, 5 points. 48 | dir, err := ioutil.TempDir("", "test_validation") 49 | if err != nil { 50 | t.Fatal(err) 51 | } 52 | defer os.RemoveAll(dir) 53 | 54 | w, err := wal.NewSize(nil, nil, dir, 1<<16, false) 55 | if err != nil { 56 | t.Fatal(err) 57 | } 58 | defer w.Close() 59 | 60 | var encoder record.Encoder 61 | 62 | ts := time.Now().Unix() * 1000 63 | 64 | require.NoError(t, w.Log( 65 | encoder.Series([]record.RefSeries{ 66 | { 67 | Ref: 1, 68 | Labels: labels.Labels{ 69 | {Name: "job", Value: "job1"}, 70 | {Name: "instance", Value: "inst1"}, 71 | {Name: "__name__", Value: "counter"}, 72 | }, 73 | }, 74 | { 75 | Ref: 2, 76 | Labels: labels.Labels{ 77 | {Name: "job", Value: "job1"}, 78 | {Name: "instance", Value: "inst1"}, 79 | {Name: "__name__", Value: "gauge"}, 80 | }, 81 | }, 82 | { 83 | Ref: 3, 84 | Labels: labels.Labels{ 85 | {Name: "job", Value: "job1"}, 86 | {Name: "instance", Value: "inst1"}, 87 | {Name: "__name__", Value: "correct"}, 88 | }, 89 | }, 90 | }, nil), 91 | encoder.Samples([]record.RefSample{ 92 | // Note the names above do not correlate with 93 | // type--there are two counters according to 94 | // the metadata returned (see below) and they 95 | // each have a first cumulative report of 100 96 | // (with different reset values). 97 | {Ref: 1, T: ts, V: 100}, 98 | {Ref: 2, T: ts, V: 1000}, 99 | {Ref: 3, T: ts, V: 10000}, 100 | {Ref: 2, T: ts + 1000, V: 1100}, 101 | {Ref: 3, T: ts + 1000, V: 10100}, 102 | }, nil), 103 | )) 104 | 105 | require.NoError(t, w.Close()) 106 | 107 | // Create an OTLP server that returns the following gRPC Trailers 108 | ms := newTestServer(t, grpcmeta.MD{ 109 | "otlp-points-dropped": {"2"}, 110 | "otlp-metrics-dropped": {"1"}, 111 | "otlp-invalid-reason1": {"count"}, 112 | "otlp-invalid-reason2": {"gauge", "mistake"}, 113 | }) 114 | defer ms.Stop() 115 | ms.runDiagnosticsService(nil) 116 | ms.runPrometheusService(promtest.Config{ 117 | // Conflicting types for "counter" and "gauge". 118 | Metadata: promtest.MetadataMap{ 119 | "job1/inst1/counter": &config.MetadataEntry{ 120 | Metric: "counter", 121 | MetricType: textparse.MetricTypeGauge, 122 | }, 123 | "job1/inst1/gauge": &config.MetadataEntry{ 124 | Metric: "gauge", 125 | MetricType: textparse.MetricTypeCounter, 126 | }, 127 | "job1/inst1/correct": &config.MetadataEntry{ 128 | Metric: "correct", 129 | MetricType: textparse.MetricTypeCounter, 130 | }, 131 | }, 132 | }) 133 | 134 | // Start a sidecar to read the WAL and report diagnostics, 135 | // includ the invalid metrics. 136 | cmd := exec.Command( 137 | os.Args[0], 138 | append(e2eTestMainSupervisorFlags, 139 | // Note: the next two flags ensure both the 140 | // destination and diagnostics output go to 141 | // the same place. 142 | "--destination.endpoint=http://127.0.0.1:19000", 143 | "--diagnostics.endpoint=http://127.0.0.1:19000", 144 | "--prometheus.wal", dir, 145 | "--startup.timeout=15s", 146 | "--healthcheck.period=5s", 147 | "--destination.timeout=5s", 148 | )...) 149 | 150 | cmd.Env = append(os.Environ(), "RUN_MAIN=1") 151 | 152 | var bout, berr bytes.Buffer 153 | cmd.Stdout = &bout 154 | cmd.Stderr = &berr 155 | if err = cmd.Start(); err != nil { 156 | t.Errorf("execution error: %v", err) 157 | return 158 | } 159 | 160 | invalid := map[string]bool{} 161 | timer := time.NewTimer(time.Second * 10) 162 | defer timer.Stop() 163 | 164 | // Wait for 3 specific points, then 2 specific meta points. 165 | var droppedPointsFound, droppedSeriesFound int64 166 | var got = 0 167 | outer: 168 | for got < 5 || droppedPointsFound == 0 || droppedSeriesFound == 0 || len(invalid) < 3 { 169 | var data *otlpmetrics.ResourceMetrics 170 | select { 171 | case data = <-ms.metrics: 172 | case <-timer.C: 173 | t.Error("test timeout: ", got, droppedPointsFound, droppedSeriesFound) 174 | break outer 175 | } 176 | 177 | var vs otlptest.VisitorState 178 | vs.Visit(context.Background(), func( 179 | _ *otlpresource.Resource, 180 | name string, 181 | kind config.Kind, 182 | _ bool, 183 | point interface{}, 184 | ) error { 185 | switch name { 186 | case "counter", "gauge", "correct": 187 | num := point.(*otlpmetrics.NumberDataPoint).Value 188 | val := num.(*otlpmetrics.NumberDataPoint_AsDouble).AsDouble 189 | if val == 0 { 190 | // OK! 191 | } else { 192 | require.InEpsilon(t, 100, val, 0.01) 193 | } 194 | got++ 195 | case config.DroppedPointsMetric: 196 | num := point.(*otlpmetrics.NumberDataPoint).Value 197 | droppedPointsFound = num.(*otlpmetrics.NumberDataPoint_AsInt).AsInt 198 | case config.DroppedSeriesMetric: 199 | num := point.(*otlpmetrics.NumberDataPoint).Value 200 | droppedSeriesFound = num.(*otlpmetrics.NumberDataPoint_AsInt).AsInt 201 | case config.FailingMetricsMetric: 202 | attrs := point.(*otlpmetrics.NumberDataPoint).Attributes 203 | 204 | var reason, mname string 205 | for _, attr := range attrs { 206 | switch attribute.Key(attr.Key) { 207 | case common.ReasonKey: 208 | reason = attr.Value.Value.(*otlpcommon.AnyValue_StringValue).StringValue 209 | case common.MetricNameKey: 210 | mname = attr.Value.Value.(*otlpcommon.AnyValue_StringValue).StringValue 211 | } 212 | } 213 | invalid[reason+"/"+mname] = true 214 | } 215 | return nil 216 | }, data) 217 | } 218 | 219 | _ = cmd.Process.Signal(os.Interrupt) 220 | _ = cmd.Wait() 221 | 222 | t.Logf("stdout: %v\n", bout.String()) 223 | t.Logf("stderr: %v\n", berr.String()) 224 | 225 | // We saw the correct metrics. 226 | require.EqualValues(t, map[string]bool{ 227 | "reason1/count": true, 228 | "reason2/gauge": true, 229 | "reason2/mistake": true, 230 | }, invalid) 231 | 232 | // Correct drop summary: 233 | require.Equal(t, int64(2), droppedPointsFound) // from server response 234 | require.Equal(t, int64(1), droppedSeriesFound) // from server response 235 | 236 | for _, expect := range []string{ 237 | // We didn't start the trace service but received data. 238 | `unknown service opentelemetry.proto.collector.trace.v1.TraceService`, 239 | // We log the two validation errors. 240 | `reason=reason1 names=[count]`, 241 | `reason=reason2 names="[gauge mistake]"`, 242 | } { 243 | require.Contains(t, berr.String(), expect) 244 | } 245 | } 246 | -------------------------------------------------------------------------------- /common/failingset.go: -------------------------------------------------------------------------------- 1 | // Copyright Lightstep Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package common 16 | 17 | import ( 18 | "context" 19 | "fmt" 20 | "sort" 21 | "strings" 22 | "sync" 23 | "time" 24 | 25 | "github.com/go-kit/kit/log" 26 | "github.com/go-kit/kit/log/level" 27 | sidecar "github.com/lightstep/opentelemetry-prometheus-sidecar" 28 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 29 | "go.opentelemetry.io/otel/metric" 30 | ) 31 | 32 | type ( 33 | // FailingReporter is an interface for FailingSet 34 | FailingReporter interface { 35 | Set(reason, metricName string) 36 | } 37 | 38 | // FailingSet reports a set of gauges to describe failing data points. 39 | FailingSet struct { 40 | observer metric.Int64GaugeObserver 41 | logger log.Logger 42 | 43 | lock sync.Mutex 44 | short stateMap 45 | long stateMap 46 | 47 | lastSummary time.Time 48 | } 49 | 50 | stateMap map[string]nameMap 51 | nameMap map[string]struct{} 52 | ) 53 | 54 | const ( 55 | failingConstant = 1 56 | 57 | failingMetricSummaryInterval = time.Minute * 5 58 | 59 | // Limits the number of metrics names that are reported as metric labels, 60 | // this ensures that we don't explode the label cardinality of the failing metric. 61 | failingMetricMaxReportedMetrics = 50 62 | ) 63 | 64 | func NewFailingSet(logger log.Logger) *FailingSet { 65 | i := &FailingSet{ 66 | short: stateMap{}, 67 | long: stateMap{}, 68 | logger: logger, 69 | } 70 | i.observer = sidecar.OTelMeterMust.NewInt64GaugeObserver( 71 | config.FailingMetricsMetric, 72 | i.observe, 73 | metric.WithDescription("labeled examples of failing metric data"), 74 | ) 75 | return i 76 | 77 | } 78 | 79 | func (i *FailingSet) Set(reason, metricName string) { 80 | i.lock.Lock() 81 | defer i.lock.Unlock() 82 | 83 | i.short.set(reason, metricName) 84 | i.long.set(reason, metricName) 85 | } 86 | 87 | func (s stateMap) set(reason, metricName string) { 88 | if s[reason] == nil { 89 | s[reason] = nameMap{} 90 | } 91 | s[reason][metricName] = struct{}{} 92 | } 93 | 94 | func (i *FailingSet) observe(_ context.Context, result metric.Int64ObserverResult) { 95 | summary := i.observeLocked(result) 96 | 97 | if summary == nil { 98 | return 99 | } 100 | 101 | for reason, nm := range summary { 102 | var names []string 103 | for name := range nm { 104 | names = append(names, name) 105 | } 106 | sort.Strings(names) 107 | count := len(names) 108 | if count > failingMetricMaxReportedMetrics { 109 | names = names[:failingMetricMaxReportedMetrics] 110 | } 111 | level.Warn(i.logger).Log( 112 | "reason", strings.ReplaceAll(reason, "-", " "), 113 | "names", fmt.Sprint(names), 114 | "metrics_failed", count, 115 | ) 116 | } 117 | } 118 | 119 | func (i *FailingSet) observeLocked(result metric.Int64ObserverResult) stateMap { 120 | i.lock.Lock() 121 | defer i.lock.Unlock() 122 | 123 | for reason, names := range i.short { 124 | var observedCount int 125 | for metricName := range names { 126 | if observedCount >= failingMetricMaxReportedMetrics { 127 | break 128 | } 129 | observedCount++ 130 | result.Observe(failingConstant, 131 | ReasonKey.String(reason), 132 | MetricNameKey.String(metricName), 133 | ) 134 | } 135 | } 136 | i.short = stateMap{} 137 | 138 | if len(i.long) == 0 { 139 | return nil 140 | } 141 | 142 | now := time.Now() 143 | if now.Sub(i.lastSummary) < failingMetricSummaryInterval { 144 | return nil 145 | } 146 | 147 | summary := i.long 148 | i.long = stateMap{} 149 | i.lastSummary = now 150 | return summary 151 | } 152 | -------------------------------------------------------------------------------- /common/failingset_test.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "encoding/json" 7 | "fmt" 8 | "github.com/go-kit/kit/log" 9 | "go.opentelemetry.io/otel/metric" 10 | "strings" 11 | "testing" 12 | ) 13 | 14 | func TestFailingSetMaxMetricsLogged(t *testing.T) { 15 | buf := bytes.NewBuffer(make([]byte, 0, 2048)) 16 | fs := NewFailingSet(log.NewJSONLogger(buf)) 17 | 18 | for i := 0; i < 100; i++ { 19 | fs.Set("metadata_not_found", fmt.Sprintf("%d", i)) 20 | } 21 | 22 | // force the short map to be nil, so we don't record any metric. 23 | fs.short = nil 24 | ignored := metric.Int64ObserverResult{} 25 | 26 | fs.observe(context.Background(), ignored) 27 | 28 | var m map[string]interface{} 29 | err := json.Unmarshal(buf.Bytes(), &m) 30 | if err != nil { 31 | t.Fatalf("expected no error when unmarshalling json log, but got %s", err.Error()) 32 | } 33 | 34 | names := strings.Split(m["names"].(string), " ") 35 | l := len(names) 36 | if l != 50 { 37 | t.Fatalf("expected 50 names, got %d instead: %v", l, m["names"]) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /common/instruments.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | import ( 4 | sidecar "github.com/lightstep/opentelemetry-prometheus-sidecar" 5 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 6 | "go.opentelemetry.io/otel/attribute" 7 | "go.opentelemetry.io/otel/metric" 8 | ) 9 | 10 | var ( 11 | DroppedSeries = sidecar.OTelMeterMust.NewInt64Counter( 12 | config.DroppedSeriesMetric, 13 | metric.WithDescription("Number of series that could not be exported"), 14 | ) 15 | 16 | DroppedPoints = sidecar.OTelMeterMust.NewInt64Counter( 17 | config.DroppedPointsMetric, 18 | metric.WithDescription("Number of points that could not be exported"), 19 | ) 20 | 21 | SkippedPoints = sidecar.OTelMeterMust.NewInt64Counter( 22 | config.SkippedPointsMetric, 23 | metric.WithDescription("Number of points that were bypassed"), 24 | ) 25 | ) 26 | 27 | const ( 28 | ReasonKey attribute.Key = "key_reason" 29 | MetricNameKey attribute.Key = "metric_name" 30 | ) 31 | -------------------------------------------------------------------------------- /common/promapi.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | import "github.com/prometheus/prometheus/pkg/textparse" 4 | 5 | type TargetMetadataAPIResponse struct { 6 | Status string `json:"status"` 7 | Data []APITargetMetadata `json:"data,omitempty"` 8 | Error string `json:"error,omitempty"` 9 | ErrorType string `json:"errorType,omitempty"` 10 | Warnings []string `json:"warnings,omitempty"` 11 | } 12 | 13 | type APITargetMetadata struct { 14 | // We do not decode the target information. 15 | Metric string `json:"metric"` 16 | Help string `json:"help"` 17 | Type textparse.MetricType `json:"type"` 18 | } 19 | 20 | type MetadataAPIResponse struct { 21 | Status string `json:"status"` 22 | Data map[string][]APIMetadata `json:"data,omitempty"` 23 | Error string `json:"error,omitempty"` 24 | ErrorType string `json:"errorType,omitempty"` 25 | Warnings []string `json:"warnings,omitempty"` 26 | } 27 | 28 | type APIMetadata struct { 29 | Help string `json:"help"` 30 | Type textparse.MetricType `json:"type"` 31 | } 32 | 33 | type ConfigAPIResponse struct { 34 | Status string `json:"status"` 35 | Data APIConfig `json:"data,omitempty"` 36 | ErrorType string `json:"errorType,omitempty"` 37 | Error string `json:"error,omitempty"` 38 | Warnings []string `json:"warnings,omitempty"` 39 | } 40 | 41 | type APIConfig struct { 42 | YAML string `json:"yaml"` 43 | } 44 | -------------------------------------------------------------------------------- /config/example_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "log" 8 | ) 9 | 10 | func Example() { 11 | cfg, _, _, err := Configure([]string{ 12 | "program", 13 | "--config-file=./sidecar.example.yaml", 14 | }, ioutil.ReadFile) 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | 19 | data, err := json.MarshalIndent(cfg, "", " ") 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | 24 | fmt.Println(string(data)) 25 | 26 | // Output: 27 | // { 28 | // "destination": { 29 | // "endpoint": "https://otlp.io:443", 30 | // "headers": { 31 | // "access-token": "aabbccdd...wwxxyyzz" 32 | // }, 33 | // "attributes": { 34 | // "environment": "public", 35 | // "service.name": "demo" 36 | // }, 37 | // "timeout": "2m0s", 38 | // "compression": "snappy" 39 | // }, 40 | // "prometheus": { 41 | // "endpoint": "http://127.0.0.1:19090", 42 | // "wal": "/volume/wal", 43 | // "max_point_age": "72h0m0s", 44 | // "health_check_request_timeout": "5s" 45 | // }, 46 | // "opentelemetry": { 47 | // "max_bytes_per_request": 1500, 48 | // "metrics_prefix": "prefix.", 49 | // "min_shards": 100, 50 | // "max_shards": 200, 51 | // "queue_size": 100001 52 | // }, 53 | // "admin": { 54 | // "listen_ip": "0.0.0.0", 55 | // "port": 10000, 56 | // "health_check_period": "20s", 57 | // "health_check_threshold_ratio": 0.5 58 | // }, 59 | // "security": { 60 | // "root_certificates": [ 61 | // "/certs/root1.crt", 62 | // "/certs/root2.crt" 63 | // ] 64 | // }, 65 | // "diagnostics": { 66 | // "endpoint": "https://otlp.io:443", 67 | // "headers": { 68 | // "access-token": "wwxxyyzz...aabbccdd" 69 | // }, 70 | // "attributes": { 71 | // "environment": "internal" 72 | // }, 73 | // "timeout": "1m0s", 74 | // "compression": "snappy" 75 | // }, 76 | // "startup_timeout": "5m0s", 77 | // "filters": [ 78 | // "metric{label=value}", 79 | // "other{l1=v1,l2=v2}" 80 | // ], 81 | // "metric_renames": [ 82 | // { 83 | // "from": "old_metric", 84 | // "to": "new_metric" 85 | // }, 86 | // { 87 | // "from": "mistake", 88 | // "to": "correct" 89 | // } 90 | // ], 91 | // "static_metadata": [ 92 | // { 93 | // "metric": "network_bps", 94 | // "type": "counter", 95 | // "value_type": "int64", 96 | // "help": "Number of bits transferred by this process." 97 | // } 98 | // ], 99 | // "log": { 100 | // "level": "debug", 101 | // "format": "json", 102 | // "verbose": 1 103 | // }, 104 | // "leader_election": { 105 | // "enabled": true, 106 | // "k8s": { 107 | // "namespace": "tools" 108 | // } 109 | // }, 110 | // "disable_supervisor": false, 111 | // "disable_diagnostics": false 112 | // } 113 | } 114 | -------------------------------------------------------------------------------- /config/sidecar.example.yaml: -------------------------------------------------------------------------------- 1 | # Destination parameters, where the sidecar will send metrics: 2 | destination: 3 | 4 | # Endpoint should be a HTTP or HTTPS address that accepts 5 | # OpenTelemetry Metrics v0.5 (or later) over gRPC: 6 | endpoint: https://otlp.io:443 7 | 8 | # Headers are included as gRPC metadata, represented as HTTP 9 | # request headers when exporting metric data: 10 | headers: 11 | Access-Token: aabbccdd...wwxxyyzz 12 | 13 | # Attributes are applied as OpenTelemetry resources on exporter 14 | # metrics, which are typically treated like constant labels in 15 | # downstream systems: 16 | attributes: 17 | # See the well-known semantic conventions for system resources: 18 | # https://github.com/open-telemetry/opentelemetry-specification/tree/master/specification/resource/semantic_conventions 19 | service.name: demo 20 | environment: public 21 | 22 | timeout: 2m 23 | 24 | # Compression format to be used, if any. Defaults to snappy: 25 | compression: snappy 26 | 27 | # Prometheus configuration: 28 | prometheus: 29 | # The primary HTTP endpoint: 30 | endpoint: http://127.0.0.1:19090 31 | 32 | # Location of the write-ahead-log directory. 33 | wal: /volume/wal 34 | 35 | # Skip points older than this 36 | max_point_age: 72h 37 | 38 | # OpenTelemetry settings: 39 | opentelemetry: 40 | # Send at most this number of bytes per request 41 | max_bytes_per_request: 1500 42 | 43 | # Min number of shards, i.e. amount of concurrency 44 | min_shards: 100 45 | 46 | # Max number of shards, i.e. amount of concurrency 47 | max_shards: 200 48 | 49 | # Metrics prefix is prepended to all exported metric names: 50 | metrics_prefix: prefix. 51 | 52 | # Outbound queue size limit 53 | queue_size: 100001 54 | 55 | # Administrative settings: 56 | admin: 57 | # Listen address of the sidecar's http server (e.g., for healtchecks) 58 | listen_ip: 0.0.0.0 59 | port: 10000 60 | 61 | # Controls how often the health check status is updated. This must 62 | # be raised for Prometheus configurations that scrape infrequently. 63 | # Default: 1m 64 | health_check_period: 20s 65 | # Controls the threshold ratio used to determine if the check 66 | # should pass or fail based on the number of success or failure to 67 | # send metrics via OTLP 68 | health_check_threshold_ratio: 0.5 69 | 70 | # Security settings: 71 | security: 72 | # Root certificates used in TLS settings: 73 | root_certificates: 74 | - /certs/root1.crt 75 | - /certs/root2.crt 76 | 77 | # Diagnostics parameters, where the sidecar will send its own diagnostic 78 | # data. This is structurally the same as destination, above. If this is 79 | # not configured and disable_diagnostics is also not set, this section 80 | # will be auto-configured to match the primary destination. 81 | diagnostics: 82 | endpoint: https://otlp.io:443 83 | headers: 84 | Access-Token: wwxxyyzz...aabbccdd 85 | attributes: 86 | environment: internal 87 | 88 | # Set this to prevent auto-configuring diagnostics. 89 | disable_diagnostics: false 90 | 91 | # Filters expressed as Prometheus series expressions. If any of these 92 | # are configured, at least one must match for the timeseries be 93 | # exported: 94 | filters: 95 | - metric{label=value} 96 | - other{l1=v1,l2=v2} 97 | 98 | # Metric renamings: 99 | metric_renames: 100 | - from: old_metric 101 | to: new_metric 102 | - from: mistake 103 | to: correct 104 | 105 | # Static metadata configures the kind of metric (counter, gauge, 106 | # histogram, summary), the value type (int64, double), and the 107 | # help string used for exporting metrics. These settings override 108 | # metadata discovered via Prometheus: 109 | static_metadata: 110 | - metric: network_bps 111 | type: counter 112 | value_type: int64 113 | help: Number of bits transferred by this process. 114 | 115 | # Startup timeout determines how long to wait for the endpoint to 116 | # become available once before entering the initial run state. 117 | startup_timeout: 300s 118 | 119 | # Control the format and level of console-logging output: 120 | log: 121 | level: debug 122 | format: json 123 | verbose: 1 124 | 125 | # Control whether to use leadership election, which prevents bulk 126 | # duplication of data in a Promtheus HA environment. 127 | leader_election: 128 | enabled: true 129 | 130 | # k8s leader election settings: 131 | k8s: 132 | namespace: tools 133 | 134 | -------------------------------------------------------------------------------- /docs/design.md: -------------------------------------------------------------------------------- 1 | # Design 2 | 3 | _Javier Kohen (jkohen), Fabian Reinartz (fabxc)_ 4 | 5 | _2018-04-05_ 6 | 7 | [See the original design document here.](https://github.com/Stackdriver/stackdriver-prometheus-sidecar/blob/master/docs/design.md) 8 | -------------------------------------------------------------------------------- /docs/img/opentelemetry-prometheus-sidecar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightstep/opentelemetry-prometheus-sidecar/a9be6a6ed05478cac75a89b6a463a029ae7900c1/docs/img/opentelemetry-prometheus-sidecar.png -------------------------------------------------------------------------------- /docs/operations.md: -------------------------------------------------------------------------------- 1 | # Operations 2 | 3 | ## Prerequisites 4 | 5 | The sidecar exposes a variety of metrics about its internal state that are 6 | essential during troubleshooting. Ensure that its associated Prometheus server is 7 | configured to scrape the sidecar's `/metrics` endpoint. 8 | 9 | ## Verify that the sidecar is running 10 | 11 | Verify that the sidecar is running along your Prometheus server: 12 | 13 | ``` 14 | kubectl -n get pods 15 | ``` 16 | 17 | You should see the following line: 18 | 19 | ``` 20 | NAME READY STATUS RESTARTS AGE 21 | ... 22 | prometheus-k8s-85cf598f75-64fjk 2/2 Running 0 24m 23 | ... 24 | ``` 25 | 26 | If it shows to have only one container (Ready: `1/1`), go back to the setup 27 | instructions and verify that you've correctly configured the Prometheus 28 | deployment/stateful set. 29 | 30 | If it shows not both containers are ready, check the logs of the Prometheus and 31 | sidecar containers for any error messages: 32 | 33 | ``` 34 | kubectl -n logs prometheus 35 | kubectl -n logs sidecar 36 | ``` 37 | 38 | ## Verify that the sidecar operates correctly 39 | 40 | ### Does the sidecar process Prometheus's data? 41 | 42 | The sidecar follows the write-ahead-log of the Prometheus storage and converts 43 | Prometheus data into OpenTelemetry metrics time series. 44 | 45 | Go to the Prometheus UI and run the following query: 46 | 47 | ``` 48 | rate(prometheus_sidecar_samples_processed[5m]) 49 | ``` 50 | 51 | It should produce a value greater than 0, which indicates how many Prometheus 52 | samples the sidecar is continously processing. 53 | 54 | If it is zero, go to the `/targets` page in the UI and verify that Prometheus 55 | itself is actually ingesting data. If no targets are visible, consult the 56 | [Prometheus documentation][prom-getting-started] on how to configure Prometheus correctly. 57 | 58 | ### Are samples being sent to OpenTelemetry? 59 | 60 | Run the following query to verify that the sidecar produces OpenTelemetry data 61 | from the Prometheus samples: 62 | 63 | ``` 64 | rate(prometheus_sidecar_samples_produced[5m]) 65 | ``` 66 | 67 | The number is generally expected to be lower than the number of processed samples 68 | since multiple Prometheus samples (e.g. histogram buckets) may be consolidated 69 | into a single complex OpenTelemetry sample. 70 | 71 | If it is zero, check the sidecar's logs for reported errors. 72 | 73 | Verify that the produced samples are successfully being sent to OpenTelemetry: 74 | 75 | ``` 76 | rate(prometheus_remote_storage_succeeded_samples_total[5m]) 77 | ``` 78 | 79 | The number should generally match the number of produced samples from the previous 80 | metric. If it is notably lower, check the sidecars logs for hints that OpenTelemetry 81 | rejected some samples. 82 | If no samples were sent successfully at all, the logs might indicate a broader 83 | error such as invalid credentials. 84 | 85 | ### Can the sidecar keep up with Prometheus? 86 | 87 | The number of samples produced by Prometheus and processed by the sidecar, should 88 | be virtually identical. The following two queries should report nearly the same 89 | number: 90 | 91 | ``` 92 | rate(prometheus_sidecar_samples_processed[5m]) 93 | rate(prometheus_tsdb_head_samples_appended_total[5m]) 94 | ``` 95 | 96 | If the sidecar's processed samples are notably lower, Prometheus may be producing 97 | more data than the sidecar can process and/or write to OpenTelemetry. 98 | Check the sidecar for logs that indicate rate limiting by the OpenTelemetry API. 99 | You can further verify backpressure with the following query: 100 | 101 | ``` 102 | prometheus_remote_storage_queue_length{queue="https://monitoring.googleapis.com:443/"} / 103 | prometheus_remote_storage_queue_capacity{queue="https://monitoring.googleapis.com:443/"} 104 | ``` 105 | 106 | If the queue fullness has an upward trend or has already reached 1, you may 107 | consider [filtering][filter-docs] the amount of data that is forward to 108 | OpenTelemetry to excldue particularly noisy or high-volume metrics. 109 | Reducing the overall scrape interval of Prometheus is another option. 110 | 111 | 112 | [prom-getting-started]: https://prometheus.io/docs/prometheus/latest/getting_started/ 113 | [filter-docs]: ../README.md#filters 114 | 115 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/lightstep/opentelemetry-prometheus-sidecar 2 | 3 | require ( 4 | github.com/d4l3k/messagediff v1.2.1 // indirect 5 | github.com/ghodss/yaml v1.0.0 6 | github.com/go-kit/kit v0.10.0 7 | github.com/go-logfmt/logfmt v0.5.0 8 | github.com/golang/protobuf v1.5.2 9 | github.com/golang/snappy v0.0.2 10 | github.com/google/go-cmp v0.5.6 11 | github.com/google/uuid v1.1.2 12 | github.com/hashicorp/go-version v1.2.0 13 | github.com/oklog/run v1.1.0 14 | github.com/pkg/errors v0.9.1 15 | github.com/prometheus/client_model v0.2.0 16 | github.com/prometheus/common v0.15.0 17 | github.com/prometheus/prom2json v1.3.0 18 | // Prometheus server does not follow go modules conventions: 19 | // 20 | // Release v2.24.1 / 2021-01-19 has git-sha 0a7fdd3b76960808c3a91d92267c3d815c1bc354 21 | // 22 | // Maps to: 23 | // 24 | // github.com/prometheus/prometheus v1.8.2-0.20210119214810-e4487274853c 25 | // 26 | // Computed using: 27 | // 28 | // go get github.com/prometheus/prometheus@e4487274853c587717006eeda8804e597d120340 29 | // 30 | // see https://github.com/prometheus/prometheus/issues/7663. 31 | github.com/prometheus/prometheus v1.8.2-0.20210119214810-e4487274853c 32 | github.com/stretchr/testify v1.7.0 33 | go.opentelemetry.io/contrib/instrumentation/host v0.23.0 34 | go.opentelemetry.io/contrib/instrumentation/runtime v0.23.0 35 | go.opentelemetry.io/contrib/propagators/b3 v0.23.0 36 | go.opentelemetry.io/otel v1.1.0 37 | go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.24.0 38 | go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.24.0 39 | go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.0.0 40 | go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.0.0 41 | go.opentelemetry.io/otel/metric v0.24.0 42 | go.opentelemetry.io/otel/sdk v1.0.1 43 | go.opentelemetry.io/otel/sdk/export/metric v0.24.0 44 | go.opentelemetry.io/otel/sdk/metric v0.24.0 45 | go.opentelemetry.io/otel/trace v1.1.0 46 | go.opentelemetry.io/proto/otlp v0.9.0 47 | golang.org/x/net v0.0.0-20201224014010-6772e930b67b 48 | google.golang.org/genproto v0.0.0-20201201144952-b05cb90ed32e 49 | google.golang.org/grpc v1.41.0 50 | google.golang.org/protobuf v1.27.1 51 | gopkg.in/alecthomas/kingpin.v2 v2.2.6 52 | gopkg.in/d4l3k/messagediff.v1 v1.2.1 53 | gopkg.in/yaml.v2 v2.4.0 54 | k8s.io/apimachinery v0.20.1 55 | k8s.io/client-go v0.20.1 56 | ) 57 | 58 | go 1.15 59 | -------------------------------------------------------------------------------- /health/health_test.go: -------------------------------------------------------------------------------- 1 | // Copyright The OpenTelemetry Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package health 16 | 17 | import ( 18 | "context" 19 | "encoding/json" 20 | "fmt" 21 | "math/rand" 22 | "net/http" 23 | "net/http/httptest" 24 | "testing" 25 | 26 | promconfig "github.com/prometheus/prometheus/config" 27 | "github.com/prometheus/prometheus/pkg/labels" 28 | 29 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 30 | "github.com/lightstep/opentelemetry-prometheus-sidecar/leader" 31 | "github.com/lightstep/opentelemetry-prometheus-sidecar/telemetry" 32 | "github.com/stretchr/testify/require" 33 | "go.opentelemetry.io/otel/attribute" 34 | "go.opentelemetry.io/otel/metric" 35 | controller "go.opentelemetry.io/otel/sdk/metric/controller/basic" 36 | ) 37 | 38 | type tester struct { 39 | *testing.T 40 | *Checker 41 | *controller.Controller 42 | producedInst metric.Int64Counter 43 | outcomeInst metric.Int64Counter 44 | aliveServer *httptest.Server 45 | } 46 | 47 | type controllerGetter struct { 48 | controller *controller.Controller 49 | } 50 | 51 | func (getter *controllerGetter) GetController() *controller.Controller { 52 | return getter.controller 53 | } 54 | 55 | type promGlobalConfigGetter struct { 56 | externalLabels labels.Labels 57 | } 58 | 59 | func (getter *promGlobalConfigGetter) GetGlobalConfig() promconfig.GlobalConfig { 60 | return promconfig.GlobalConfig{ 61 | ExternalLabels: getter.externalLabels, 62 | } 63 | } 64 | 65 | type alwaysLeaderCandidate struct { 66 | } 67 | 68 | func (n alwaysLeaderCandidate) Start(ctx context.Context) error { 69 | return nil 70 | } 71 | 72 | func (n alwaysLeaderCandidate) IsLeader() bool { 73 | return true 74 | } 75 | 76 | var _ leader.Candidate = (*alwaysLeaderCandidate)(nil) 77 | 78 | func testController(t *testing.T) *tester { 79 | cont := telemetry.InternalOnly().Controller 80 | produced := metric.Must(cont.Meter("test")).NewInt64Counter(config.ProducedPointsMetric) 81 | outcome := metric.Must(cont.Meter("test")).NewInt64Counter(config.OutcomeMetric) 82 | configGetter := &promGlobalConfigGetter{externalLabels: labels.FromStrings("test_name", "test_value")} 83 | 84 | checker := NewChecker(&controllerGetter{cont}, configGetter, 0 /* uncached */, telemetry.DefaultLogger(), config.DefaultHealthCheckThresholdRatio, &alwaysLeaderCandidate{}) 85 | 86 | aliveServer := httptest.NewServer(checker.Alive()) 87 | 88 | return &tester{ 89 | T: t, 90 | Checker: checker, 91 | Controller: cont, 92 | producedInst: produced, 93 | outcomeInst: outcome, 94 | aliveServer: aliveServer, 95 | } 96 | } 97 | 98 | func (t *tester) Collect() { 99 | require.NoError(t.T, t.Controller.Collect(context.Background())) 100 | } 101 | 102 | func (t *tester) getHealth() (int, Response) { 103 | require.NoError(t.T, t.Controller.Collect(context.Background())) 104 | 105 | url := t.aliveServer.URL 106 | 107 | resp, err := http.Get(url) 108 | require.NoError(t.T, err) 109 | 110 | var res Response 111 | require.NoError(t.T, json.NewDecoder(resp.Body).Decode(&res)) 112 | 113 | require.Equal(t.T, resp.StatusCode, res.Code) 114 | 115 | return resp.StatusCode, res 116 | } 117 | 118 | func TestProducedProgress(t *testing.T) { 119 | // Try health check failures after 1, 2, and 3 healthy periods. 120 | for k := 1; k <= 3; k++ { 121 | ctx := context.Background() 122 | tester := testController(t) 123 | tester.SetRunning() 124 | 125 | // For the number of healthy periods, add one at a time 126 | // and check for health. 127 | for j := 0; j < k; j++ { 128 | tester.producedInst.Add(ctx, 1) 129 | tester.outcomeInst.Add(ctx, 1, attribute.String("outcome", "success")) 130 | 131 | for i := 0; i < numSamples-1; i++ { 132 | code, result := tester.getHealth() 133 | 134 | require.Equal(t, http.StatusOK, code, "i/j %d/%d", i, j) 135 | require.Equal(t, "healthy", result.Status) 136 | } 137 | } 138 | 139 | code, result := tester.getHealth() 140 | 141 | require.Equal(t, http.StatusServiceUnavailable, code) 142 | require.Contains(t, result.Status, 143 | fmt.Sprintf("unhealthy: %s stopped moving at %d", 144 | config.ProducedPointsMetric, 145 | k, 146 | ), 147 | ) 148 | } 149 | } 150 | 151 | func TestOutcomesProgress(t *testing.T) { 152 | ctx := context.Background() 153 | tester := testController(t) 154 | tester.SetRunning() 155 | 156 | for j := 0; j < numSamples; j++ { 157 | tester.outcomeInst.Add(ctx, 10, attribute.String("outcome", "success")) 158 | tester.producedInst.Add(ctx, 1) 159 | 160 | code, result := tester.getHealth() 161 | 162 | require.Equal(t, http.StatusOK, code) 163 | require.Equal(t, "healthy", result.Status) 164 | } 165 | 166 | for j := 0; j < numSamples/2; j++ { 167 | tester.outcomeInst.Add(ctx, 10, attribute.String("outcome", "failed")) 168 | tester.producedInst.Add(ctx, 1) 169 | 170 | code, result := tester.getHealth() 171 | 172 | require.Equal(t, http.StatusOK, code, "J %d", j) 173 | require.Equal(t, "healthy", result.Status) 174 | } 175 | 176 | code, result := tester.getHealth() 177 | 178 | require.Equal(t, http.StatusServiceUnavailable, code) 179 | require.Contains(t, result.Status, 180 | fmt.Sprintf("unhealthy: %s high error ratio", 181 | config.OutcomeMetric, 182 | ), 183 | ) 184 | } 185 | 186 | func TestOutcomesProgressCustomRatio(t *testing.T) { 187 | ctx := context.Background() 188 | tester := testController(t) 189 | tester.Checker.thresholdRatio = 0.3 190 | tester.SetRunning() 191 | 192 | for j := 0; j < numSamples; j++ { 193 | tester.outcomeInst.Add(ctx, 10, attribute.String("outcome", "success")) 194 | tester.producedInst.Add(ctx, 1) 195 | 196 | code, result := tester.getHealth() 197 | 198 | require.Equal(t, http.StatusOK, code) 199 | require.Equal(t, "healthy", result.Status) 200 | } 201 | 202 | for j := 0; j < numSamples/2; j++ { 203 | tester.outcomeInst.Add(ctx, 10, attribute.String("outcome", "failed")) 204 | tester.producedInst.Add(ctx, 1) 205 | 206 | code, result := tester.getHealth() 207 | 208 | require.Equal(t, http.StatusOK, code, "J %d", j) 209 | require.Equal(t, "healthy", result.Status) 210 | } 211 | 212 | code, result := tester.getHealth() 213 | 214 | require.Equal(t, http.StatusOK, code) 215 | require.Equal(t, "healthy", result.Status) 216 | } 217 | 218 | func TestOutcomes4951(t *testing.T) { 219 | ctx := context.Background() 220 | tester := testController(t) 221 | tester.SetRunning() 222 | 223 | for j := 0; j < 100; j++ { 224 | tester.outcomeInst.Add(ctx, 51, attribute.String("outcome", "success")) 225 | tester.outcomeInst.Add(ctx, 49, attribute.String("outcome", fmt.Sprint(rand.Intn(10)))) 226 | tester.producedInst.Add(ctx, 100) 227 | 228 | code, result := tester.getHealth() 229 | 230 | require.Equal(t, http.StatusOK, code) 231 | require.Equal(t, "healthy", result.Status) 232 | } 233 | } 234 | 235 | func TestOutcomesNoSuccess(t *testing.T) { 236 | ctx := context.Background() 237 | tester := testController(t) 238 | tester.SetRunning() 239 | 240 | for j := 0; j < numSamples-1; j++ { 241 | tester.outcomeInst.Add(ctx, 10, attribute.String("outcome", "failed")) 242 | tester.producedInst.Add(ctx, 1) 243 | 244 | code, result := tester.getHealth() 245 | 246 | require.Equal(t, http.StatusOK, code) 247 | require.Equal(t, "healthy", result.Status) 248 | } 249 | 250 | code, result := tester.getHealth() 251 | 252 | require.Equal(t, http.StatusServiceUnavailable, code) 253 | require.Contains(t, result.Status, 254 | fmt.Sprintf("unhealthy: %s{%s} stopped moving at %d", 255 | config.OutcomeMetric, 256 | outcomeGoodAttribute, 257 | 0, 258 | ), 259 | ) 260 | } 261 | 262 | func TestSuperStackdump(t *testing.T) { 263 | tester := testController(t) 264 | tester.SetRunning() 265 | 266 | for i := 0; i < numSamples-1; i++ { 267 | code, result := tester.getHealth() 268 | 269 | require.Equal(t, http.StatusOK, code) 270 | require.Equal(t, "healthy", result.Status) 271 | require.Equal(t, "", result.Stackdump) 272 | } 273 | 274 | code, result := tester.getHealth() 275 | 276 | require.Equal(t, http.StatusServiceUnavailable, code) 277 | require.Contains(t, result.Stackdump, "goroutine") 278 | oldStack := result.Stackdump 279 | 280 | code, result = tester.getHealth() 281 | 282 | require.Equal(t, http.StatusServiceUnavailable, code) 283 | require.NotEqual(t, oldStack, result.Stackdump) 284 | } 285 | -------------------------------------------------------------------------------- /internal/promtest/fake.go: -------------------------------------------------------------------------------- 1 | package promtest 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "math/rand" 8 | "net/http" 9 | "net/http/httptest" 10 | "net/url" 11 | "sync" 12 | "time" 13 | 14 | "github.com/lightstep/opentelemetry-prometheus-sidecar/common" 15 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 16 | "github.com/lightstep/opentelemetry-prometheus-sidecar/telemetry" 17 | ) 18 | 19 | // MetadataMap implements a MetadataGetter for exact matches of "job/instance/metric" inputs. 20 | type MetadataMap map[string]*config.MetadataEntry 21 | 22 | func (m MetadataMap) Get(ctx context.Context, job, instance, metric string) (*config.MetadataEntry, error) { 23 | return m[job+"/"+instance+"/"+metric], nil 24 | } 25 | 26 | type Config struct { 27 | Version string 28 | Metadata MetadataMap 29 | } 30 | 31 | type FakePrometheus struct { 32 | lock sync.Mutex 33 | ready bool 34 | segment int 35 | intervals []time.Duration 36 | config string 37 | mux *http.ServeMux 38 | } 39 | 40 | func NewFakePrometheus(cfg Config) *FakePrometheus { 41 | if cfg.Version == "" { 42 | cfg.Version = config.PrometheusMinVersion 43 | } 44 | 45 | const segmentName = config.PrometheusCurrentSegmentMetricName 46 | const scrapeIntervalName = config.PrometheusTargetIntervalLengthName 47 | const scrapeIntervalSum = scrapeIntervalName + "_sum" 48 | const scrapeIntervalCount = scrapeIntervalName + "_count" 49 | const promBuildInfo = config.PrometheusBuildInfoName 50 | 51 | fp := &FakePrometheus{ 52 | ready: true, 53 | segment: 0, 54 | intervals: []time.Duration{30 * time.Second}, 55 | mux: http.NewServeMux(), 56 | } 57 | 58 | fp.mux.HandleFunc("/-/ready", func(w http.ResponseWriter, r *http.Request) { 59 | fp.lock.Lock() 60 | defer fp.lock.Unlock() 61 | if fp.ready { 62 | w.WriteHeader(http.StatusOK) 63 | } else { 64 | w.WriteHeader(http.StatusServiceUnavailable) 65 | } 66 | }) 67 | fp.mux.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) { 68 | fp.lock.Lock() 69 | defer fp.lock.Unlock() 70 | 71 | _, err := w.Write([]byte(fmt.Sprintf(` 72 | # HELP %s A metric with a constant '1' value labeled by version, revision, branch, and goversion from which prometheus was built. 73 | # TYPE %s gauge 74 | %s{branch="HEAD",goversion="go1.11.1",revision="167a4b4e73a8eca8df648d2d2043e21bdb9a7449",version="%s"} 1 75 | `, promBuildInfo, promBuildInfo, promBuildInfo, cfg.Version))) 76 | if err != nil { 77 | panic(err) 78 | } 79 | 80 | _, err = w.Write([]byte(fmt.Sprintf(` 81 | # HELP %s Current segment. 82 | # TYPE %s gauge 83 | %s{} %d 84 | `, segmentName, segmentName, segmentName, fp.segment))) 85 | if err != nil { 86 | panic(err) 87 | } 88 | 89 | _, err = w.Write([]byte(fmt.Sprintf(` 90 | # HELP %s Scrape interval summary. 91 | # TYPE %s summary 92 | `, scrapeIntervalName, scrapeIntervalName))) 93 | if err != nil { 94 | panic(err) 95 | } 96 | 97 | for _, in := range fp.intervals { 98 | cnt := 1 + rand.Intn(3) 99 | p99 := in.Seconds() + 0.000123 100 | sum := float64(cnt) * p99 101 | _, err = w.Write([]byte(fmt.Sprintf(` 102 | %s{interval="%s",quantile="0.99"} %f 103 | %s{interval="%s"} %f 104 | %s{interval="%s"} %d 105 | `, scrapeIntervalName, in, p99, scrapeIntervalSum, in, sum, scrapeIntervalCount, in, cnt))) 106 | if err != nil { 107 | panic(err) 108 | } 109 | } 110 | }) 111 | 112 | // Serve instrument metadata 113 | fp.mux.HandleFunc("/"+config.PrometheusTargetMetadataEndpointPath, 114 | func(w http.ResponseWriter, r *http.Request) { 115 | var metaResp common.TargetMetadataAPIResponse 116 | for _, entry := range cfg.Metadata { 117 | // Note: This endpoint is used to request metadata 118 | // for a specific target. It does not use the target 119 | // details and returns constant metadata for testing 120 | // purposes. 121 | metaResp.Data = append(metaResp.Data, common.APITargetMetadata{ 122 | Metric: entry.Metric, 123 | Help: "helpful", 124 | Type: entry.MetricType, 125 | }) 126 | } 127 | metaRespData, err := json.Marshal(metaResp) 128 | if err != nil { 129 | panic(err) 130 | } 131 | 132 | _, _ = w.Write(metaRespData) 133 | }, 134 | ) 135 | 136 | // Serve the server's configuration 137 | fp.mux.HandleFunc("/"+config.PrometheusConfigEndpointPath, 138 | func(w http.ResponseWriter, r *http.Request) { 139 | fp.lock.Lock() 140 | defer fp.lock.Unlock() 141 | 142 | var cfg common.ConfigAPIResponse 143 | cfg.Status = "ok" 144 | cfg.Data.YAML = fp.config 145 | 146 | data, err := json.Marshal(&cfg) 147 | if err != nil { 148 | http.Error(w, err.Error(), http.StatusInternalServerError) 149 | return 150 | } 151 | _, _ = w.Write(data) 152 | }, 153 | ) 154 | return fp 155 | } 156 | 157 | func (fp *FakePrometheus) Test() *url.URL { 158 | server := httptest.NewServer(fp.mux) 159 | 160 | fpu, err := url.Parse(server.URL) 161 | if err != nil { 162 | panic(err) 163 | } 164 | 165 | return fpu 166 | } 167 | 168 | func (fp *FakePrometheus) ReadyConfig() config.PromReady { 169 | return config.PromReady{ 170 | Logger: telemetry.DefaultLogger(), 171 | PromURL: fp.Test(), 172 | HealthCheckRequestTimeout: config.DefaultHealthCheckTimeout, 173 | } 174 | } 175 | 176 | func (fp *FakePrometheus) SetSegment(s int) { 177 | fp.lock.Lock() 178 | defer fp.lock.Unlock() 179 | 180 | fp.segment = s 181 | } 182 | 183 | func (fp *FakePrometheus) SetReady(r bool) { 184 | fp.lock.Lock() 185 | defer fp.lock.Unlock() 186 | 187 | fp.ready = r 188 | } 189 | 190 | func (fp *FakePrometheus) SetIntervals(is ...time.Duration) { 191 | fp.lock.Lock() 192 | defer fp.lock.Unlock() 193 | 194 | fp.intervals = is 195 | } 196 | 197 | func (fp *FakePrometheus) SetPromConfigYaml(config string) { 198 | fp.lock.Lock() 199 | defer fp.lock.Unlock() 200 | 201 | fp.config = config 202 | } 203 | 204 | func (fp *FakePrometheus) ServeMux() *http.ServeMux { 205 | return fp.mux 206 | } 207 | -------------------------------------------------------------------------------- /leader/leader.go: -------------------------------------------------------------------------------- 1 | package leader 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "time" 7 | 8 | sidecar "github.com/lightstep/opentelemetry-prometheus-sidecar" 9 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 10 | "go.opentelemetry.io/otel/metric" 11 | 12 | "github.com/go-kit/kit/log" 13 | "github.com/go-kit/kit/log/level" 14 | "github.com/pkg/errors" 15 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 16 | "k8s.io/client-go/kubernetes" 17 | "k8s.io/client-go/rest" 18 | "k8s.io/client-go/tools/leaderelection" 19 | "k8s.io/client-go/tools/leaderelection/resourcelock" 20 | ) 21 | 22 | type Candidate interface { 23 | Start(ctx context.Context) error 24 | IsLeader() bool 25 | } 26 | 27 | type Controller interface { 28 | OnNewLeader(self bool, identity string) 29 | OnStartedLeading(ctx context.Context) 30 | OnStoppedLeading() 31 | } 32 | 33 | type candidate struct { 34 | client kubernetes.Interface 35 | ctrl Controller 36 | id string 37 | elector *leaderelection.LeaderElector 38 | logger log.Logger 39 | leaderMetric metric.Int64UpDownCounterObserver 40 | } 41 | 42 | type LoggingController struct { 43 | log.Logger 44 | } 45 | 46 | func NewAlwaysLeaderCandidate() Candidate { 47 | return alwaysLeader{} 48 | } 49 | 50 | type alwaysLeader struct{} 51 | 52 | func (a alwaysLeader) Start(_ context.Context) error { 53 | return nil 54 | } 55 | 56 | func (a alwaysLeader) IsLeader() bool { 57 | return true 58 | } 59 | 60 | func NewClient() (*kubernetes.Clientset, error) { 61 | cfg, err := rest.InClusterConfig() 62 | if err != nil { 63 | return nil, errors.Wrap(err, "in-cluster k8s config") 64 | } 65 | client, err := kubernetes.NewForConfig(cfg) 66 | if err != nil { 67 | return nil, errors.Wrap(err, "new k8s client") 68 | } 69 | return client, err 70 | } 71 | 72 | func NewKubernetesCandidate(client kubernetes.Interface, namespace, name, id string, ctrl Controller, logger log.Logger) (Candidate, error) { 73 | c := &candidate{ 74 | client: client, 75 | ctrl: ctrl, 76 | id: id, 77 | logger: logger, 78 | } 79 | 80 | c.leaderMetric = sidecar.OTelMeterMust.NewInt64UpDownCounterObserver( 81 | config.LeadershipMetric, 82 | func(ctx context.Context, result metric.Int64ObserverResult) { 83 | if c.IsLeader() { 84 | result.Observe(1) 85 | } else { 86 | result.Observe(0) 87 | } 88 | }, 89 | metric.WithDescription("Leadership status of this sidecar"), 90 | ) 91 | 92 | lock := &resourcelock.LeaseLock{ 93 | LeaseMeta: metav1.ObjectMeta{ 94 | Name: name, 95 | Namespace: namespace, 96 | }, 97 | Client: c.client.CoordinationV1(), 98 | LockConfig: resourcelock.ResourceLockConfig{ 99 | Identity: id, 100 | }, 101 | } 102 | 103 | lec := leaderelection.LeaderElectionConfig{ 104 | Lock: lock, 105 | Name: fmt.Sprint(namespace, "-", name, ":", id), 106 | ReleaseOnCancel: true, 107 | LeaseDuration: 60 * time.Second, 108 | RenewDeadline: 15 * time.Second, 109 | RetryPeriod: 5 * time.Second, 110 | Callbacks: leaderelection.LeaderCallbacks{ 111 | OnStartedLeading: func(ctx context.Context) { 112 | ctrl.OnStartedLeading(ctx) 113 | }, 114 | OnStoppedLeading: func() { 115 | ctrl.OnStoppedLeading() 116 | }, 117 | OnNewLeader: func(id string) { 118 | ctrl.OnNewLeader(lock.LockConfig.Identity == id, id) 119 | }, 120 | }, 121 | } 122 | 123 | elector, err := leaderelection.NewLeaderElector(lec) 124 | if err != nil { 125 | return nil, errors.Wrap(err, "start elector") 126 | } 127 | c.elector = elector 128 | return c, nil 129 | 130 | } 131 | 132 | func (c *candidate) Start(ctx context.Context) error { 133 | // This runs until the context is canceled by main(). 134 | go c.elector.Run(ctx) 135 | 136 | return nil 137 | } 138 | 139 | var _ Candidate = (*alwaysLeader)(nil) 140 | 141 | func (c *candidate) IsLeader() bool { 142 | return c.elector.IsLeader() 143 | } 144 | 145 | func (c LoggingController) OnStartedLeading(ctx context.Context) { 146 | level.Info(c.Logger).Log("msg", "this sidecar started leading") 147 | } 148 | 149 | func (c LoggingController) OnStoppedLeading() { 150 | level.Info(c.Logger).Log("msg", "this sidecar stopped leading") 151 | } 152 | 153 | func (c LoggingController) OnNewLeader(self bool, identity string) { 154 | if self { 155 | level.Info(c.Logger).Log("msg", "this sidecar has become leader", "id", identity) 156 | return 157 | } 158 | level.Info(c.Logger).Log("msg", "another sidecar became leader is", "id", identity) 159 | } 160 | -------------------------------------------------------------------------------- /leader/leader_test.go: -------------------------------------------------------------------------------- 1 | package leader 2 | 3 | import ( 4 | "context" 5 | "sync" 6 | "testing" 7 | 8 | "github.com/lightstep/opentelemetry-prometheus-sidecar/telemetry" 9 | "github.com/stretchr/testify/require" 10 | "k8s.io/client-go/kubernetes/fake" 11 | ) 12 | 13 | type testController struct { 14 | lock sync.Mutex 15 | cond *sync.Cond 16 | started bool 17 | newLeader bool 18 | } 19 | 20 | func newTest() *testController { 21 | tc := &testController{} 22 | tc.cond = sync.NewCond(&tc.lock) 23 | return tc 24 | } 25 | 26 | func TestLeaderElection(t *testing.T) { 27 | fc := fake.NewSimpleClientset() 28 | tc := newTest() 29 | le, err := NewKubernetesCandidate(fc, "default", "hello", "world", tc, telemetry.DefaultLogger()) 30 | require.NoError(t, err) 31 | 32 | ctx, cancel := context.WithCancel(context.Background()) 33 | defer cancel() 34 | 35 | require.NoError(t, le.Start(ctx)) 36 | 37 | tc.lock.Lock() 38 | for !tc.started || !tc.newLeader { 39 | tc.cond.Wait() 40 | } 41 | 42 | tc.lock.Unlock() 43 | } 44 | 45 | func (c *testController) OnStartedLeading(ctx context.Context) { 46 | c.lock.Lock() 47 | defer c.lock.Unlock() 48 | c.started = true 49 | c.cond.Broadcast() 50 | } 51 | 52 | func (c *testController) OnStoppedLeading() { 53 | c.lock.Lock() 54 | defer c.lock.Unlock() 55 | c.started = false 56 | c.cond.Broadcast() 57 | } 58 | 59 | func (c *testController) OnNewLeader(self bool, identity string) { 60 | c.lock.Lock() 61 | defer c.lock.Unlock() 62 | c.newLeader = self 63 | c.cond.Broadcast() 64 | } 65 | -------------------------------------------------------------------------------- /metadata/metadata_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Google Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package metadata 18 | 19 | import ( 20 | "context" 21 | "encoding/json" 22 | "net/http" 23 | "net/http/httptest" 24 | "net/url" 25 | "reflect" 26 | "testing" 27 | 28 | "github.com/lightstep/opentelemetry-prometheus-sidecar/common" 29 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 30 | "github.com/prometheus/prometheus/pkg/textparse" 31 | ) 32 | 33 | type targetMetadataResponse common.TargetMetadataAPIResponse 34 | type metadataResponse common.MetadataAPIResponse 35 | 36 | func TestCache_Get(t *testing.T) { 37 | metrics := []common.APITargetMetadata{ 38 | {Metric: "metric1", Type: textparse.MetricTypeCounter, Help: "help_metric1"}, 39 | {Metric: "metric2", Type: textparse.MetricTypeGauge, Help: "help_metric2"}, 40 | {Metric: "metric3", Type: textparse.MetricTypeHistogram, Help: "help_metric3"}, 41 | {Metric: "metric4", Type: textparse.MetricTypeSummary, Help: "help_metric4"}, 42 | {Metric: "metric5", Type: textparse.MetricTypeUnknown, Help: "help_metric5"}, 43 | {Metric: "metric6", Type: config.MetricTypeUntyped, Help: "help_metric6"}, 44 | {Metric: "metric_with_override", Type: textparse.MetricTypeGauge, Help: "help_metric_with_override"}, 45 | } 46 | // Batch metadata is served by /targets/metadata whereas 47 | // single metadata is served by /metatada/ 48 | var tMetadataHandler func(qMetric, qMatch string) *targetMetadataResponse 49 | var metadataHandler func(qMetric string) *metadataResponse 50 | 51 | mux := http.NewServeMux() 52 | mux.HandleFunc("/targets/metadata/", func(w http.ResponseWriter, r *http.Request) { 53 | err := json.NewEncoder(w).Encode(tMetadataHandler( 54 | r.FormValue("metric"), 55 | r.FormValue("match_target"), 56 | )) 57 | if err != nil { 58 | t.Fatal(err) 59 | } 60 | }) 61 | mux.HandleFunc("/metadata/", func(w http.ResponseWriter, r *http.Request) { 62 | err := json.NewEncoder(w).Encode(metadataHandler( 63 | r.FormValue("metric"), 64 | )) 65 | if err != nil { 66 | t.Fatal(err) 67 | } 68 | }) 69 | 70 | ts := httptest.NewServer(mux) 71 | expect := func(want common.APITargetMetadata, got *config.MetadataEntry) { 72 | if !reflect.DeepEqual(want, common.APITargetMetadata{ 73 | Metric: got.Metric, 74 | Type: got.MetricType, 75 | Help: got.Help, 76 | }) { 77 | t.Errorf("unexpected result %v, want %v", got, want) 78 | } 79 | } 80 | ctx, cancel := context.WithCancel(context.Background()) 81 | defer cancel() 82 | 83 | targetMetadataURL, err := url.Parse(ts.URL + "/targets/metadata/") 84 | if err != nil { 85 | t.Fatal(err) 86 | } 87 | metadataURL, err := url.Parse(ts.URL + "/metadata/") 88 | if err != nil { 89 | t.Fatal(err) 90 | } 91 | // Create cache with static metadata. 92 | staticMetadata := []*config.MetadataEntry{ 93 | &config.MetadataEntry{Metric: "static_metric1", MetricType: textparse.MetricTypeCounter, ValueType: config.INT64, Help: "help_static1"}, 94 | &config.MetadataEntry{Metric: "static_metric2", MetricType: textparse.MetricTypeCounter, ValueType: config.DOUBLE, Help: "help_static2"}, 95 | &config.MetadataEntry{Metric: "metric_with_override", MetricType: textparse.MetricTypeCounter, ValueType: config.INT64, Help: "help_metric_override"}, 96 | } 97 | c := NewCache(nil, targetMetadataURL, metadataURL, staticMetadata) 98 | 99 | // First get for the job, we expect an initial batch request. 100 | tMetadataHandler = func(qMetric, qMatch string) *targetMetadataResponse { 101 | if qMetric != "" { 102 | t.Fatalf("unexpected metric %v in request", qMetric) 103 | } 104 | if qMatch != `{job="prometheus",instance="localhost:9090"}` { 105 | t.Fatalf("unexpected matcher %v in request", qMatch) 106 | } 107 | return &targetMetadataResponse{Status: "success", Data: metrics[:4]} 108 | } 109 | md, err := c.Get(ctx, "prometheus", "localhost:9090", "metric2") 110 | if err != nil { 111 | t.Fatal(err) 112 | } 113 | expect(metrics[1], md) 114 | 115 | // Query metric that should have been retrieved in the initial batch. 116 | tMetadataHandler = func(qMetric, qMatch string) *targetMetadataResponse { 117 | t.Fatal("unexpected request") 118 | return nil 119 | } 120 | md, err = c.Get(ctx, "prometheus", "localhost:9090", "metric1") 121 | if err != nil { 122 | t.Fatal(err) 123 | } 124 | expect(metrics[0], md) 125 | // Similarly, changing the instance should not trigger a fetch with a known metric and job. 126 | md, err = c.Get(ctx, "prometheus", "localhost:8000", "metric3") 127 | if err != nil { 128 | t.Fatal(err) 129 | } 130 | expect(metrics[2], md) 131 | 132 | // Query metric that was not in the batch, expect a single-metric query. 133 | metadataHandler = func(qMetric string) *metadataResponse { 134 | if qMetric != "metric5" { 135 | t.Fatalf("unexpected metric %v in request", qMetric) 136 | } 137 | return &metadataResponse{Status: "success", Data: map[string][]common.APIMetadata{ 138 | "metric5": []common.APIMetadata{common.APIMetadata{ 139 | Type: metrics[4].Type, Help: metrics[4].Help, 140 | }}, 141 | }} 142 | } 143 | md, err = c.Get(ctx, "prometheus", "localhost:9090", "metric5") 144 | if err != nil { 145 | t.Fatal(err) 146 | } 147 | expect(metrics[4], md) 148 | // It should be in our cache afterwards. 149 | metadataHandler = func(qMetric string) *metadataResponse { 150 | t.Fatal("unexpected request") 151 | return nil 152 | } 153 | md, err = c.Get(ctx, "prometheus", "localhost:9090", "metric5") 154 | if err != nil { 155 | t.Fatal(err) 156 | } 157 | expect(metrics[4], md) 158 | 159 | // Test "untyped" metric type from Prometheus 2.4. 160 | metadataHandler = func(qMetric string) *metadataResponse { 161 | if qMetric != "metric6" { 162 | t.Fatalf("unexpected metric %v in request", qMetric) 163 | } 164 | return &metadataResponse{Status: "success", Data: map[string][]common.APIMetadata{ 165 | "metric6": []common.APIMetadata{common.APIMetadata{ 166 | Type: metrics[5].Type, Help: metrics[5].Help, 167 | }}, 168 | }} 169 | } 170 | md, err = c.Get(ctx, "prometheus", "localhost:9090", "metric6") 171 | if err != nil { 172 | t.Fatal(err) 173 | } 174 | expect(common.APITargetMetadata{Metric: "metric6", Type: textparse.MetricTypeUnknown, Help: "help_metric6"}, md) 175 | 176 | // The scrape layer's metrics should not fire off requests. 177 | for _, internalName := range []string{"up", "scrape_series_added"} { 178 | md, err = c.Get(ctx, "prometheus", "localhost:9090", internalName) 179 | if err != nil { 180 | t.Fatal(err) 181 | } 182 | if !reflect.DeepEqual(internalMetrics[internalName], md) { 183 | t.Errorf("unexpected metadata %v, want %v", *md, internalMetrics[internalName]) 184 | } 185 | md, err = c.Get(ctx, "prometheus", "localhost:9090", internalName) 186 | } 187 | 188 | // If a metric does not exist, we first expect a fetch (single metric) attempt. 189 | metadataHandler = func(qMetric string) *metadataResponse { 190 | if qMetric != "does_not_exist" { 191 | t.Fatalf("unexpected metric %v in request", qMetric) 192 | } 193 | return &metadataResponse{Status: "error", ErrorType: apiErrorNotFound, Error: "does not exist"} 194 | } 195 | md, err = c.Get(ctx, "prometheus", "localhost:9090", "does_not_exist") 196 | if err != nil { 197 | t.Fatal(err) 198 | } 199 | if md != nil { 200 | t.Fatalf("expected nil metadata but got %v", md) 201 | } 202 | // Requesting it again should not do another (single metric) request (modulo timeout). 203 | metadataHandler = func(qMetric string) *metadataResponse { 204 | t.Fatal("unexpected request") 205 | return nil 206 | } 207 | md, err = c.Get(ctx, "prometheus", "localhost:9090", "does_not_exist") 208 | if err != nil { 209 | t.Fatal(err) 210 | } 211 | if md != nil { 212 | t.Fatalf("expected nil metadata but got %v", md) 213 | } 214 | 215 | // Test matcher escaping. 216 | tMetadataHandler = func(qMetric, qMatch string) *targetMetadataResponse { 217 | if qMatch != `{job="prometheus\nwith_newline",instance="localhost:9090"}` { 218 | t.Fatalf("matcher not escaped properly: %s", qMatch) 219 | } 220 | return nil 221 | } 222 | _, err = c.Get(ctx, "prometheus\nwith_newline", "localhost:9090", "metric") 223 | if err != nil { 224 | t.Fatal(err) 225 | } 226 | 227 | // Test fallthrough to static metadata. 228 | tMetadataHandler = func(qMetric, qMatch string) *targetMetadataResponse { 229 | return nil 230 | } 231 | md, err = c.Get(ctx, "prometheus", "localhost:9090", "static_metric2") 232 | if err != nil { 233 | t.Fatal(err) 234 | } 235 | want := staticMetadata[1] 236 | if !reflect.DeepEqual(md, want) { 237 | t.Errorf("expected metadata %v but got %v", want, md) 238 | } 239 | 240 | // Test override with static metadata. 241 | tMetadataHandler = func(qMetric, qMatch string) *targetMetadataResponse { 242 | return &targetMetadataResponse{Status: "success", Data: metrics} 243 | } 244 | md, err = c.Get(ctx, "prometheus", "localhost:9090", "metric_with_override") 245 | if err != nil { 246 | t.Fatal(err) 247 | } 248 | want = staticMetadata[2] 249 | if !reflect.DeepEqual(md, want) { 250 | t.Errorf("expected metadata %v but got %v", want, md) 251 | } 252 | 253 | // Test recording rule (uses single metric fetching). 254 | metadataHandler = func(qMetric string) *metadataResponse { 255 | return nil 256 | } 257 | md, err = c.Get(ctx, "prometheus", "localhost:9090", "some:recording:rule") 258 | if err != nil { 259 | t.Fatal(err) 260 | } 261 | want = &config.MetadataEntry{ 262 | Metric: "some:recording:rule", 263 | MetricType: textparse.MetricTypeGauge, 264 | } 265 | if !reflect.DeepEqual(md, want) { 266 | t.Errorf("expected metadata %v but got %v", want, md) 267 | } 268 | } 269 | 270 | func TestNewCache(t *testing.T) { 271 | static := []*config.MetadataEntry{ 272 | &config.MetadataEntry{Metric: "a", Help: "a"}, 273 | &config.MetadataEntry{Metric: "b", Help: "b"}, 274 | } 275 | c := NewCache(nil, nil, nil, static) 276 | 277 | want := map[string]*config.MetadataEntry{ 278 | "a": &config.MetadataEntry{Metric: "a", Help: "a"}, 279 | "b": &config.MetadataEntry{Metric: "b", Help: "b"}, 280 | } 281 | if !reflect.DeepEqual(c.staticMetadata, want) { 282 | t.Errorf("expected metadata %v but got %v", want, c.staticMetadata) 283 | } 284 | } 285 | -------------------------------------------------------------------------------- /otlp/client.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package otlp 15 | 16 | import ( 17 | "context" 18 | "crypto/tls" 19 | "crypto/x509" 20 | "fmt" 21 | "io/ioutil" 22 | "net" 23 | "net/url" 24 | "strconv" 25 | "strings" 26 | "time" 27 | 28 | "github.com/go-kit/kit/log" 29 | "github.com/go-kit/kit/log/level" 30 | "github.com/lightstep/opentelemetry-prometheus-sidecar/common" 31 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 32 | "github.com/lightstep/opentelemetry-prometheus-sidecar/telemetry" 33 | "github.com/lightstep/opentelemetry-prometheus-sidecar/telemetry/doevery" 34 | metricsService "go.opentelemetry.io/proto/otlp/collector/metrics/v1" 35 | "google.golang.org/grpc" 36 | "google.golang.org/grpc/credentials" 37 | grpcMetadata "google.golang.org/grpc/metadata" 38 | "google.golang.org/protobuf/proto" 39 | ) 40 | 41 | const ( 42 | // serviceConfig copied from OTel-Go. 43 | // https://github.com/open-telemetry/opentelemetry-go/blob/5ed96e92446d2d58d131e0672da613a84c16af7a/exporters/otlp/grpcoptions.go#L37 44 | serviceConfig = `{ 45 | "methodConfig":[{ 46 | "name":[ 47 | { "service":"opentelemetry.proto.collector.metrics.v1.MetricsService" }, 48 | { "service":"opentelemetry.proto.collector.trace.v1.TraceService" } 49 | ], 50 | "retryPolicy":{ 51 | "MaxAttempts":5, 52 | "InitialBackoff":"0.3s", 53 | "MaxBackoff":"5s", 54 | "BackoffMultiplier":2, 55 | "RetryableStatusCodes":[ 56 | "UNAVAILABLE", 57 | "CANCELLED", 58 | "DEADLINE_EXCEEDED", 59 | "RESOURCE_EXHAUSTED", 60 | "ABORTED", 61 | "OUT_OF_RANGE", 62 | "UNAVAILABLE", 63 | "DATA_LOSS" 64 | ] 65 | } 66 | }] 67 | }` 68 | 69 | invalidTrailerPrefix = "otlp-invalid-" 70 | ) 71 | 72 | var ( 73 | exportDuration = telemetry.NewTimer( 74 | "sidecar.export.duration", 75 | "duration of the otlp.Export() call", 76 | ) 77 | 78 | connectDuration = telemetry.NewTimer( 79 | "sidecar.connect.duration", 80 | "duration of the grpc.Dial() call", 81 | ) 82 | 83 | errNoSingleCount = fmt.Errorf("no single count") 84 | ) 85 | 86 | // Client allows reading and writing from/to a remote gRPC endpoint. The 87 | // implementation may hit a single backend, so the application should create a 88 | // number of these clients. 89 | type Client struct { 90 | logger log.Logger 91 | url *url.URL 92 | timeout time.Duration 93 | rootCertificates []string 94 | headers grpcMetadata.MD 95 | compressor string 96 | prometheus config.PromConfig 97 | invalidSet common.FailingReporter 98 | 99 | conn *grpc.ClientConn 100 | } 101 | 102 | // ClientConfig configures a Client. 103 | type ClientConfig struct { 104 | Logger log.Logger 105 | URL *url.URL 106 | Timeout time.Duration 107 | RootCertificates []string 108 | Headers grpcMetadata.MD 109 | Compressor string 110 | Prometheus config.PromConfig 111 | FailingReporter common.FailingReporter 112 | } 113 | 114 | // NewClient creates a new Client. 115 | func NewClient(conf ClientConfig) *Client { 116 | logger := conf.Logger 117 | if logger == nil { 118 | logger = log.NewNopLogger() 119 | } 120 | return &Client{ 121 | logger: logger, 122 | url: conf.URL, 123 | timeout: conf.Timeout, 124 | rootCertificates: conf.RootCertificates, 125 | headers: conf.Headers, 126 | compressor: conf.Compressor, 127 | prometheus: conf.Prometheus, 128 | invalidSet: conf.FailingReporter, 129 | } 130 | } 131 | 132 | // getConnection will dial a new connection if one is not set. When 133 | // dialing, this function uses its a new context and the same timeout 134 | // used for Store(). 135 | func (c *Client) getConnection(ctx context.Context) (_ *grpc.ClientConn, retErr error) { 136 | if c.conn != nil { 137 | return c.conn, nil 138 | } 139 | 140 | defer connectDuration.Start(ctx).Stop(&retErr) 141 | 142 | ctx, cancel := context.WithTimeout(ctx, c.timeout) 143 | defer cancel() 144 | 145 | useAuth := c.url.Scheme != "http" 146 | level.Debug(c.logger).Log( 147 | "msg", "new OTLP connection", 148 | "auth", useAuth, 149 | "url", c.url.String(), 150 | "timeout", c.timeout) 151 | 152 | dopts := []grpc.DialOption{ 153 | grpc.WithBlock(), // Wait for the connection to be established before using it. 154 | grpc.WithDefaultServiceConfig(serviceConfig), 155 | 156 | // Note: The Sidecar->OTel gRPC connection is not traced: 157 | // grpc.WithUnaryInterceptor(otelgrpc.UnaryClientInterceptor()), 158 | } 159 | if useAuth { 160 | var tcfg tls.Config 161 | if len(c.rootCertificates) != 0 { 162 | certPool := x509.NewCertPool() 163 | 164 | for _, cert := range c.rootCertificates { 165 | bs, err := ioutil.ReadFile(cert) 166 | if err != nil { 167 | return nil, fmt.Errorf("could not read certificate authority certificate: %s: %w", cert, err) 168 | } 169 | 170 | ok := certPool.AppendCertsFromPEM(bs) 171 | if !ok { 172 | return nil, fmt.Errorf("could not parse certificate authority certificate: %s: %w", cert, err) 173 | } 174 | } 175 | 176 | tcfg = tls.Config{ 177 | ServerName: c.url.Hostname(), 178 | RootCAs: certPool, 179 | } 180 | } 181 | level.Debug(c.logger).Log( 182 | "msg", "TLS configured", 183 | "server", c.url.Hostname(), 184 | "root_certs", fmt.Sprint(c.rootCertificates), 185 | ) 186 | dopts = append(dopts, grpc.WithTransportCredentials(credentials.NewTLS(&tcfg))) 187 | } else { 188 | dopts = append(dopts, grpc.WithInsecure()) 189 | } 190 | if c.compressor != "" && c.compressor != "none" { 191 | dopts = append(dopts, grpc.WithDefaultCallOptions(grpc.UseCompressor(c.compressor))) 192 | } 193 | address := c.url.Hostname() 194 | if len(c.url.Port()) > 0 { 195 | address = net.JoinHostPort(address, c.url.Port()) 196 | } 197 | conn, err := grpc.DialContext(ctx, address, dopts...) 198 | c.conn = conn 199 | if err != nil { 200 | level.Debug(c.logger).Log( 201 | "msg", "connection status", 202 | "address", address, 203 | "err", err, 204 | ) 205 | return nil, err 206 | } 207 | 208 | return conn, err 209 | } 210 | 211 | // Selftest sends an empty request the endpoint. 212 | func (c *Client) Selftest(ctx context.Context) error { 213 | // Loop until the context is canceled, allowing for retryable failures. 214 | for { 215 | conn, err := c.getConnection(ctx) 216 | 217 | if err == nil { 218 | service := metricsService.NewMetricsServiceClient(conn) 219 | empty := &metricsService.ExportMetricsServiceRequest{} 220 | 221 | _, err = service.Export(c.grpcMetadata(ctx), empty) 222 | if err == nil { 223 | return nil 224 | } 225 | } 226 | 227 | select { 228 | case <-ctx.Done(): 229 | return ctx.Err() 230 | default: 231 | if isRecoverable(err) { 232 | level.Info(c.logger).Log("msg", "selftest recoverable error, still trying", "err", err) 233 | continue 234 | } 235 | } 236 | return fmt.Errorf( 237 | "non-recoverable failure in selftest: %s", 238 | truncateErrorString(err), 239 | ) 240 | } 241 | } 242 | 243 | // Store sends a batch of samples to the endpoint. 244 | func (c *Client) Store(req *metricsService.ExportMetricsServiceRequest) error { 245 | tss := req.ResourceMetrics 246 | if len(tss) == 0 { 247 | // Nothing to do, return silently. 248 | return nil 249 | } 250 | 251 | // Note the call to getConnection() applies its own timeout for Dial(). 252 | ctx := context.Background() 253 | conn, err := c.getConnection(ctx) 254 | if err != nil { 255 | return err 256 | } 257 | 258 | ctx, cancel := context.WithTimeout(ctx, c.timeout) 259 | defer cancel() 260 | 261 | service := metricsService.NewMetricsServiceClient(conn) 262 | 263 | var md grpcMetadata.MD 264 | defer exportDuration.Start(ctx).Stop(&err) 265 | 266 | if _, err = service.Export(c.grpcMetadata(ctx), req, grpc.Trailer(&md)); err != nil { 267 | doevery.TimePeriod(config.DefaultNoisyLogPeriod, func() { 268 | level.Error(c.logger).Log( 269 | "msg", "export failure", 270 | "err", truncateErrorString(err), 271 | "size", proto.Size(req), 272 | "trailers", fmt.Sprint(md), 273 | "recoverable", isRecoverable(err), 274 | ) 275 | }) 276 | return err 277 | } 278 | // Note: Lightstep uses gRPC response Trailers to return 279 | // information about validation errors following a successful 280 | // Export when any points or metrics were dropped. 281 | c.parseResponseMetadata(ctx, md) 282 | 283 | doevery.TimePeriod(config.DefaultNoisyLogPeriod, func() { 284 | level.Debug(c.logger).Log( 285 | "msg", "successful write", 286 | "size", proto.Size(req), 287 | "trailers", fmt.Sprint(md), 288 | ) 289 | }) 290 | return nil 291 | } 292 | 293 | func singleCount(values []string) (int, error) { 294 | if len(values) != 1 { 295 | return 0, errNoSingleCount 296 | } 297 | return strconv.Atoi(values[0]) 298 | } 299 | 300 | func (c *Client) parseResponseMetadata(ctx context.Context, md grpcMetadata.MD) { 301 | for key, values := range md { 302 | key = strings.ToLower(key) 303 | if !strings.HasPrefix(key, "otlp-") { 304 | continue 305 | } 306 | if key == "otlp-points-dropped" { 307 | if points, err := singleCount(values); err == nil { 308 | common.DroppedPoints.Add( 309 | ctx, 310 | int64(points), 311 | common.ReasonKey.String("validation"), 312 | ) 313 | } 314 | } else if key == "otlp-metrics-dropped" { 315 | if points, err := singleCount(values); err == nil { 316 | common.DroppedSeries.Add( 317 | ctx, 318 | int64(points), 319 | common.ReasonKey.String("validation"), 320 | ) 321 | } 322 | } else if strings.HasPrefix(key, invalidTrailerPrefix) { 323 | key = key[len(invalidTrailerPrefix):] 324 | for _, metricName := range values { 325 | c.invalidSet.Set(key, metricName) 326 | } 327 | } else { 328 | doevery.TimePeriod(config.DefaultNoisyLogPeriod, func() { 329 | level.Info(c.logger).Log( 330 | "msg", "unrecognized trailer", 331 | "key", key, 332 | "values", fmt.Sprint(values), 333 | ) 334 | }) 335 | } 336 | } 337 | } 338 | 339 | func (c *Client) Close() error { 340 | if c.conn == nil { 341 | return nil 342 | } 343 | return c.conn.Close() 344 | } 345 | 346 | func (c *Client) grpcMetadata(ctx context.Context) context.Context { 347 | return grpcMetadata.NewOutgoingContext(ctx, c.headers) 348 | } 349 | -------------------------------------------------------------------------------- /otlp/client_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package otlp 15 | 16 | import ( 17 | "net/url" 18 | "testing" 19 | "time" 20 | 21 | "github.com/stretchr/testify/require" 22 | metricsService "go.opentelemetry.io/proto/otlp/collector/metrics/v1" 23 | metric_pb "go.opentelemetry.io/proto/otlp/metrics/v1" 24 | "golang.org/x/net/nettest" 25 | "google.golang.org/grpc" 26 | ) 27 | 28 | func TestStoreErrorHandlingOnTimeout(t *testing.T) { 29 | listener, err := nettest.NewLocalListener("tcp") 30 | require.NoError(t, err) 31 | grpcServer := grpc.NewServer() 32 | metricsService.RegisterMetricsServiceServer(grpcServer, &metricServiceServer{ 33 | status: nil, 34 | }) 35 | go grpcServer.Serve(listener) 36 | defer grpcServer.Stop() 37 | 38 | serverURL, err := url.Parse("https://" + listener.Addr().String()) 39 | if err != nil { 40 | t.Fatal(err) 41 | } 42 | 43 | c := NewClient(ClientConfig{ 44 | URL: serverURL, 45 | Timeout: 0, // Immeditate Timeout. 46 | }) 47 | err = c.Store(&metricsService.ExportMetricsServiceRequest{ 48 | ResourceMetrics: []*metric_pb.ResourceMetrics{ 49 | {}, 50 | }, 51 | }) 52 | require.True(t, isRecoverable(err), "expected recoverableError in error %v", err) 53 | } 54 | 55 | func TestEmptyRequest(t *testing.T) { 56 | serverURL, err := url.Parse("http://localhost:12345") 57 | if err != nil { 58 | t.Fatal(err) 59 | } 60 | c := NewClient(ClientConfig{ 61 | URL: serverURL, 62 | Timeout: time.Second, 63 | }) 64 | if err := c.Store(&metricsService.ExportMetricsServiceRequest{}); err != nil { 65 | t.Fatal(err) 66 | } 67 | } 68 | 69 | // Note: There is no test that the client correctly chooses the 70 | // correct branch after the call to service.Export in Client.Store(). 71 | // This is deficient, however we are planning to replace this code 72 | // with the OTel-Go OTLP Exporter, after which such a test would have 73 | // to be rewritten from scratch. 74 | -------------------------------------------------------------------------------- /otlp/ewma.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package otlp 15 | 16 | import ( 17 | "sync" 18 | "sync/atomic" 19 | "time" 20 | ) 21 | 22 | // ewmaRate tracks an exponentially weighted moving average of a per-second rate. 23 | type ewmaRate struct { 24 | newEvents int64 25 | alpha float64 26 | interval time.Duration 27 | lastRate float64 28 | init bool 29 | mutex sync.Mutex 30 | } 31 | 32 | // newEWMARate always allocates a new ewmaRate, as this guarantees the atomically 33 | // accessed int64 will be aligned on ARM. See prometheus#2666. 34 | func newEWMARate(alpha float64, interval time.Duration) *ewmaRate { 35 | return &ewmaRate{ 36 | alpha: alpha, 37 | interval: interval, 38 | } 39 | } 40 | 41 | // rate returns the per-second rate. 42 | func (r *ewmaRate) rate() float64 { 43 | r.mutex.Lock() 44 | defer r.mutex.Unlock() 45 | return r.lastRate 46 | } 47 | 48 | // tick assumes to be called every r.interval. 49 | func (r *ewmaRate) tick() { 50 | newEvents := atomic.LoadInt64(&r.newEvents) 51 | atomic.AddInt64(&r.newEvents, -newEvents) 52 | instantRate := float64(newEvents) / r.interval.Seconds() 53 | 54 | r.mutex.Lock() 55 | defer r.mutex.Unlock() 56 | 57 | if r.init { 58 | r.lastRate += r.alpha * (instantRate - r.lastRate) 59 | } else { 60 | r.init = true 61 | r.lastRate = instantRate 62 | } 63 | } 64 | 65 | // inc counts one event. 66 | func (r *ewmaRate) incr(incr int64) { 67 | atomic.AddInt64(&r.newEvents, incr) 68 | } 69 | -------------------------------------------------------------------------------- /otlp/test_server.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Google Inc. 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package otlp 14 | 15 | import ( 16 | "context" 17 | 18 | metricsService "go.opentelemetry.io/proto/otlp/collector/metrics/v1" 19 | "google.golang.org/grpc/status" 20 | ) 21 | 22 | type metricServiceServer struct { 23 | status *status.Status 24 | metricsService.UnimplementedMetricsServiceServer 25 | } 26 | 27 | func (s *metricServiceServer) Export(ctx context.Context, req *metricsService.ExportMetricsServiceRequest) (*metricsService.ExportMetricsServiceResponse, error) { 28 | var emptyValue = metricsService.ExportMetricsServiceResponse{} 29 | 30 | if s.status == nil { 31 | return &emptyValue, nil 32 | } 33 | 34 | return nil, s.status.Err() 35 | } 36 | -------------------------------------------------------------------------------- /otlp/writer.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Google Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package otlp 18 | 19 | import ( 20 | "io" 21 | 22 | "github.com/go-kit/kit/log" 23 | "github.com/go-kit/kit/log/level" 24 | "github.com/golang/protobuf/proto" 25 | metricsService "go.opentelemetry.io/proto/otlp/collector/metrics/v1" 26 | ) 27 | 28 | // ExportMetricsRequestWriterCloser allows writing protobuf message 29 | // monitoring.ExportMetricsRequest as wire format into the writerCloser. 30 | type ExportMetricsRequestWriterCloser struct { 31 | logger log.Logger 32 | writeCloser io.WriteCloser 33 | } 34 | 35 | func NewExportMetricsServiceRequestWriterCloser(writeCloser io.WriteCloser, logger log.Logger) *ExportMetricsRequestWriterCloser { 36 | if logger == nil { 37 | logger = log.NewNopLogger() 38 | } 39 | return &ExportMetricsRequestWriterCloser{ 40 | writeCloser: writeCloser, 41 | logger: logger, 42 | } 43 | } 44 | 45 | // Store writes protobuf message monitoring.ExportMetricsRequest as wire 46 | // format into the writeCloser. 47 | func (c *ExportMetricsRequestWriterCloser) Store(req *metricsService.ExportMetricsServiceRequest) error { 48 | data, err := proto.Marshal(req) 49 | if err != nil { 50 | level.Warn(c.logger).Log( 51 | "msg", "failure marshaling ExportMetricsRequest.", 52 | "err", err) 53 | return err 54 | } 55 | _, err = c.writeCloser.Write(data) 56 | if err != nil { 57 | level.Warn(c.logger).Log( 58 | "msg", "failure writing data to file.", 59 | "err", err) 60 | return err 61 | } 62 | return nil 63 | } 64 | 65 | func (c *ExportMetricsRequestWriterCloser) Close() error { 66 | return c.writeCloser.Close() 67 | } 68 | -------------------------------------------------------------------------------- /otlp/writer_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Google Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package otlp 18 | 19 | import ( 20 | "bytes" 21 | "testing" 22 | 23 | "github.com/go-kit/kit/log" 24 | "github.com/golang/protobuf/proto" 25 | metricsService "go.opentelemetry.io/proto/otlp/collector/metrics/v1" 26 | metric_pb "go.opentelemetry.io/proto/otlp/metrics/v1" 27 | ) 28 | 29 | type myWriterCloser struct { 30 | Buffer bytes.Buffer 31 | } 32 | 33 | func (m *myWriterCloser) Write(p []byte) (int, error) { 34 | return m.Buffer.Write(p) 35 | } 36 | 37 | func (m *myWriterCloser) Close() error { 38 | m.Buffer.Reset() 39 | return nil 40 | } 41 | 42 | func TestRequest(t *testing.T) { 43 | var m myWriterCloser 44 | c := NewExportMetricsServiceRequestWriterCloser(&m, log.NewNopLogger()) 45 | defer c.Close() 46 | req := &metricsService.ExportMetricsServiceRequest{ 47 | ResourceMetrics: []*metric_pb.ResourceMetrics{ 48 | &metric_pb.ResourceMetrics{}, 49 | }, 50 | } 51 | if err := c.Store(req); err != nil { 52 | t.Fatal(err) 53 | } 54 | 55 | storedReq := &metricsService.ExportMetricsServiceRequest{} 56 | err := proto.Unmarshal(m.Buffer.Bytes(), storedReq) 57 | if err != nil { 58 | t.Fatal(err) 59 | } 60 | if !proto.Equal(req, storedReq) { 61 | t.Errorf("Expect requests as %v, but stored as: %v", req, storedReq) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /package.go: -------------------------------------------------------------------------------- 1 | package sidecar 2 | 3 | import ( 4 | "github.com/prometheus/common/version" 5 | "go.opentelemetry.io/otel/metric" 6 | metricotel "go.opentelemetry.io/otel/metric/global" 7 | ) 8 | 9 | const ( 10 | // Data exported from Prometheus are recored as: 11 | ExportInstrumentationLibrary = "prometheus-sidecar" 12 | 13 | // Diagnostics about this process are recorded as: 14 | SelfInstrumentationLibrary = "github.com/lightstep/opentelemetry-prometheus-sidecar" 15 | ) 16 | 17 | var ( 18 | OTelMeter = metricotel.Meter( 19 | SelfInstrumentationLibrary, 20 | metric.WithInstrumentationVersion(version.Version), 21 | ) 22 | 23 | OTelMeterMust = metric.Must(OTelMeter) 24 | ) 25 | -------------------------------------------------------------------------------- /prometheus/monitor.go: -------------------------------------------------------------------------------- 1 | package prometheus 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "net/http" 7 | "path" 8 | "sync" 9 | 10 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 11 | "github.com/lightstep/opentelemetry-prometheus-sidecar/telemetry" 12 | dto "github.com/prometheus/client_model/go" 13 | "github.com/prometheus/prom2json" 14 | promconfig "github.com/prometheus/prometheus/config" 15 | "github.com/prometheus/prometheus/pkg/labels" 16 | ) 17 | 18 | var monitorDuration = telemetry.NewTimer( 19 | "sidecar.monitor.duration", 20 | "duration of the /metrics scrape used to monitor Prometheus", 21 | ) 22 | 23 | // copied from prom2json 24 | const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3` 25 | 26 | type ( 27 | Monitor struct { 28 | cfg config.PromReady 29 | globalConfig promconfig.GlobalConfig 30 | scrapeConfig []*promconfig.ScrapeConfig 31 | } 32 | 33 | Family struct { 34 | family *dto.MetricFamily 35 | } 36 | 37 | SummaryFamily struct { 38 | family *dto.MetricFamily 39 | } 40 | 41 | Result struct { 42 | values map[string]*dto.MetricFamily 43 | } 44 | 45 | Summary struct { 46 | summary *dto.Summary 47 | } 48 | ) 49 | 50 | func NewMonitor(cfg config.PromReady) *Monitor { 51 | return &Monitor{ 52 | cfg: cfg, 53 | } 54 | } 55 | 56 | func (m *Monitor) GetMetrics(ctx context.Context) (_ Result, retErr error) { 57 | var ( 58 | wg sync.WaitGroup 59 | ch = make(chan *dto.MetricFamily) 60 | res = Result{ 61 | values: map[string]*dto.MetricFamily{}, 62 | } 63 | ) 64 | 65 | scrape := *m.cfg.PromURL 66 | scrape.Path = path.Join(scrape.Path, "/metrics") 67 | target := scrape.String() 68 | 69 | defer monitorDuration.Start(context.Background()).Stop(&retErr) 70 | defer wg.Wait() 71 | 72 | wg.Add(1) 73 | 74 | go func() { 75 | defer wg.Done() 76 | for mfam := range ch { 77 | res.values[mfam.GetName()] = mfam 78 | } 79 | }() 80 | 81 | // Note: copied from FetchMetricFamilies, Context added; this code path closes `ch`. 82 | req, err := http.NewRequestWithContext(ctx, "GET", target, nil) 83 | if err != nil { 84 | close(ch) 85 | return Result{}, fmt.Errorf("creating GET request for URL %q failed: %v", target, err) 86 | } 87 | req.Header.Add("Accept", acceptHeader) 88 | resp, err := http.DefaultClient.Do(req) 89 | if err != nil { 90 | close(ch) 91 | return Result{}, fmt.Errorf("executing GET request for URL %q failed: %v", target, err) 92 | } 93 | defer resp.Body.Close() 94 | if resp.StatusCode != http.StatusOK { 95 | close(ch) 96 | return Result{}, fmt.Errorf("GET request for URL %q returned HTTP status %s", target, resp.Status) 97 | } 98 | return res, prom2json.ParseResponse(resp, ch) 99 | } 100 | 101 | func (r Result) Counter(name string) Family { 102 | f := r.values[name] 103 | if f.GetType() != dto.MetricType_COUNTER { 104 | return Family{} 105 | } 106 | return Family{f} 107 | } 108 | 109 | func (r Result) Gauge(name string) Family { 110 | f := r.values[name] 111 | if f.GetType() != dto.MetricType_GAUGE { 112 | return Family{} 113 | } 114 | return Family{f} 115 | } 116 | 117 | func (r Result) Summary(name string) SummaryFamily { 118 | f := r.values[name] 119 | if f.GetType() != dto.MetricType_SUMMARY { 120 | return SummaryFamily{} 121 | } 122 | return SummaryFamily{f} 123 | } 124 | 125 | func exactMatch(query map[string]string, ls []*dto.LabelPair) bool { 126 | if len(ls) != len(query) { 127 | return false 128 | } 129 | for _, l := range ls { 130 | if l == nil || l.Name == nil || l.Value == nil { 131 | return false 132 | } 133 | if query[*l.Name] != *l.Value { 134 | return false 135 | } 136 | } 137 | return true 138 | } 139 | 140 | func (f Family) For(ls labels.Labels) float64 { 141 | if f.family == nil { 142 | return 0 143 | } 144 | match := ls.Map() 145 | for _, m := range f.family.Metric { 146 | switch f.family.GetType() { 147 | case dto.MetricType_COUNTER: 148 | if m.Counter != nil && m.Counter.Value != nil && exactMatch(match, m.Label) { 149 | return *m.Counter.Value 150 | } 151 | case dto.MetricType_GAUGE: 152 | if m.Gauge != nil && m.Gauge.Value != nil && exactMatch(match, m.Label) { 153 | return *m.Gauge.Value 154 | } 155 | } 156 | } 157 | return 0 158 | } 159 | 160 | // AllLabels returns the set of labels present for this family of 161 | // Summaries. 162 | func (f SummaryFamily) AllLabels() []labels.Labels { 163 | if f.family == nil { 164 | return nil 165 | } 166 | var res []labels.Labels 167 | for _, m := range f.family.Metric { 168 | var ll labels.Labels 169 | for _, lp := range m.Label { 170 | ll = append(ll, labels.Label{ 171 | Name: *lp.Name, 172 | Value: *lp.Value, 173 | }) 174 | } 175 | res = append(res, ll) 176 | } 177 | return res 178 | } 179 | 180 | func (f SummaryFamily) For(ls labels.Labels) Summary { 181 | if f.family == nil { 182 | return Summary{} 183 | } 184 | match := ls.Map() 185 | for _, m := range f.family.Metric { 186 | if f.family.GetType() != dto.MetricType_SUMMARY { 187 | continue 188 | } 189 | if !exactMatch(match, m.Label) { 190 | continue 191 | } 192 | return Summary{ 193 | summary: m.Summary, 194 | } 195 | } 196 | return Summary{} 197 | } 198 | 199 | func (s Summary) Count() uint64 { 200 | if s.summary == nil { 201 | return 0 202 | } 203 | return *s.summary.SampleCount 204 | } 205 | 206 | // AllLabels returns the set of labels present for this family 207 | func (f Family) AllLabels() []labels.Labels { 208 | if f.family == nil { 209 | return nil 210 | } 211 | var res []labels.Labels 212 | for _, m := range f.family.Metric { 213 | var ll labels.Labels 214 | for _, lp := range m.Label { 215 | ll = append(ll, labels.Label{ 216 | Name: *lp.Name, 217 | Value: *lp.Value, 218 | }) 219 | } 220 | res = append(res, ll) 221 | } 222 | return res 223 | } 224 | -------------------------------------------------------------------------------- /prometheus/monitor_test.go: -------------------------------------------------------------------------------- 1 | package prometheus 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | "net/http/httptest" 7 | "net/url" 8 | "testing" 9 | 10 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 11 | "github.com/lightstep/opentelemetry-prometheus-sidecar/telemetry" 12 | "github.com/prometheus/prometheus/pkg/labels" 13 | "github.com/stretchr/testify/require" 14 | ) 15 | 16 | func TestMonitorScrape(t *testing.T) { 17 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 18 | _, err := w.Write([]byte(` 19 | # HELP http_requests_total The total number of HTTP requests. 20 | # TYPE http_requests_total counter 21 | http_requests_total{method="post",code="200"} 1027 1395066363000 22 | http_requests_total{method="post",code="400"} 3 1395066363000 23 | 24 | # HELP blah blah 25 | # TYPE utilization gauge 26 | utilization{} 123 27 | `)) 28 | require.NoError(t, err) 29 | })) 30 | 31 | tu, err := url.Parse(ts.URL) 32 | require.NoError(t, err) 33 | 34 | m := NewMonitor(config.PromReady{ 35 | Logger: telemetry.DefaultLogger(), 36 | PromURL: tu, 37 | }) 38 | 39 | ctx := context.Background() 40 | res, err := m.GetMetrics(ctx) 41 | require.NoError(t, err) 42 | 43 | // Positive examples 44 | require.Equal(t, 1027.0, res.Counter("http_requests_total"). 45 | For(labels.FromStrings("method", "post", "code", "200"))) 46 | 47 | require.Equal(t, 3.0, res.Counter("http_requests_total"). 48 | For(labels.FromStrings("method", "post", "code", "400"))) 49 | 50 | require.Equal(t, 123.0, res.Gauge("utilization"). 51 | For(labels.FromStrings())) 52 | 53 | // Negative examples 54 | require.Equal(t, 0.0, res.Counter("http_requests_total"). 55 | For(labels.FromStrings("method", "post", "code", "400", "user", "nobody"))) 56 | 57 | require.Equal(t, 0.0, res.Counter("http_requests_total"). 58 | For(labels.FromStrings("method", "post", "code", "500"))) 59 | 60 | require.Equal(t, 0.0, res.Counter("other_requests_total"). 61 | For(labels.FromStrings("method", "post", "code", "400"))) 62 | 63 | } 64 | -------------------------------------------------------------------------------- /prometheus/ready.go: -------------------------------------------------------------------------------- 1 | package prometheus 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "net/http" 8 | "path" 9 | "time" 10 | 11 | "github.com/go-kit/kit/log/level" 12 | goversion "github.com/hashicorp/go-version" 13 | "github.com/lightstep/opentelemetry-prometheus-sidecar/common" 14 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 15 | "github.com/pkg/errors" 16 | promconfig "github.com/prometheus/prometheus/config" 17 | "gopkg.in/yaml.v2" 18 | ) 19 | 20 | const ( 21 | scrapeIntervalName = config.PrometheusTargetIntervalLengthName 22 | ) 23 | 24 | func (m *Monitor) scrapeMetrics(inCtx context.Context) (Result, error) { 25 | ctx, cancel := context.WithTimeout(inCtx, m.cfg.HealthCheckRequestTimeout) 26 | defer cancel() 27 | 28 | return m.GetMetrics(ctx) 29 | } 30 | 31 | func (m *Monitor) checkPrometheusVersion(res Result) error { 32 | minVersion, _ := goversion.NewVersion(config.PrometheusMinVersion) 33 | var prometheusVersion *goversion.Version 34 | err := errors.New("version not found") 35 | for _, lp := range res.Gauge(config.PrometheusBuildInfoName).AllLabels() { 36 | if len(lp.Get("version")) > 0 { 37 | prometheusVersion, err = goversion.NewVersion(lp.Get("version")) 38 | break 39 | } 40 | } 41 | 42 | if err != nil { 43 | return errors.Wrap(err, "prometheus version unavailable") 44 | } 45 | 46 | if prometheusVersion.LessThan(minVersion) { 47 | return errors.Errorf("prometheus version %s+ required, detected: %s", minVersion, prometheusVersion) 48 | } 49 | return nil 50 | } 51 | 52 | func (m *Monitor) scrapeIntervals(promcfg promconfig.Config) []time.Duration { 53 | ds := map[time.Duration]struct{}{} 54 | for _, sc := range promcfg.ScrapeConfigs { 55 | si := sc.ScrapeInterval 56 | if si == 0 { 57 | si = promcfg.GlobalConfig.ScrapeInterval 58 | } 59 | ds[time.Duration(si)] = struct{}{} 60 | } 61 | var res []time.Duration 62 | for d := range ds { 63 | res = append(res, d) 64 | } 65 | return res 66 | } 67 | 68 | func (m *Monitor) completedFirstScrapes(res Result, promcfg promconfig.Config, prometheusStartTime time.Time) error { 69 | scrapeIntervals := m.scrapeIntervals(promcfg) 70 | 71 | summary := res.Summary(scrapeIntervalName) 72 | foundLabelSets := summary.AllLabels() 73 | if len(foundLabelSets) == 0 { 74 | return errors.New("waiting for the first scrape(s) to complete") 75 | } 76 | 77 | // Prometheus doesn't report zero counts. We expect absent 78 | // timeseries, not zero counts, but we test for Count() != 0 on 79 | // the retrieved metrics for added safety below. 80 | 81 | if len(scrapeIntervals) == 0 { 82 | // If no intervals are configured, wait for the first one. 83 | for _, ls := range foundLabelSets { 84 | if summary.For(ls).Count() != 0 { 85 | return nil 86 | } 87 | } 88 | 89 | return nil 90 | } 91 | 92 | // Find all the known intervals. 93 | foundWhich := map[string]bool{} 94 | for _, ls := range foundLabelSets { 95 | for _, l := range ls { 96 | if l.Name == "interval" && summary.For(ls).Count() != 0 { 97 | foundWhich[l.Value] = true 98 | break 99 | } 100 | } 101 | } 102 | 103 | for _, si := range scrapeIntervals { 104 | ts := si.String() 105 | if !foundWhich[ts] { 106 | // have we waited the max amount of time before moving on 107 | if time.Since(prometheusStartTime) > si+config.DefaultScrapeIntervalWaitPeriod { 108 | level.Warn(m.cfg.Logger).Log("msg", "waited until deadline for scrape interval", "missing-interval", ts) 109 | break 110 | } else { 111 | return errors.Errorf("waiting for scrape interval %s", ts) 112 | } 113 | } 114 | } 115 | 116 | return nil 117 | } 118 | 119 | func (m *Monitor) getConfig(inCtx context.Context) (promconfig.Config, error) { 120 | ctx, cancel := context.WithTimeout(inCtx, m.cfg.HealthCheckRequestTimeout) 121 | defer cancel() 122 | 123 | return m.GetConfig(ctx) 124 | } 125 | 126 | func (m *Monitor) GetConfig(ctx context.Context) (promconfig.Config, error) { 127 | endpoint := *m.cfg.PromURL 128 | endpoint.Path = path.Join(endpoint.Path, config.PrometheusConfigEndpointPath) 129 | target := endpoint.String() 130 | 131 | req, err := http.NewRequestWithContext(ctx, "GET", target, nil) 132 | if err != nil { 133 | return promconfig.Config{}, fmt.Errorf("config request: %v", err) 134 | } 135 | resp, err := http.DefaultClient.Do(req) 136 | if err != nil { 137 | return promconfig.Config{}, fmt.Errorf("config get: %v", err) 138 | } 139 | defer resp.Body.Close() 140 | if resp.StatusCode != http.StatusOK { 141 | return promconfig.Config{}, fmt.Errorf("config get status %s", resp.Status) 142 | } 143 | 144 | var apiResp common.ConfigAPIResponse 145 | if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { 146 | return promconfig.Config{}, errors.Wrap(err, "config json decode") 147 | } 148 | 149 | // This sets the default interval as Prometheus would. 150 | promCfg := promconfig.DefaultConfig 151 | if err := yaml.Unmarshal([]byte(apiResp.Data.YAML), &promCfg); err != nil { 152 | return promconfig.Config{}, errors.Wrap(err, "config yaml decode") 153 | } 154 | 155 | return promCfg, nil 156 | 157 | } 158 | 159 | func (m *Monitor) GetGlobalConfig() promconfig.GlobalConfig { 160 | return m.globalConfig 161 | } 162 | 163 | func (m *Monitor) GetScrapeConfig() []*promconfig.ScrapeConfig { 164 | return m.scrapeConfig 165 | } 166 | 167 | func (m *Monitor) WaitForReady(inCtx context.Context, inCtxCancel context.CancelFunc) error { 168 | u := *m.cfg.PromURL 169 | u.Path = path.Join(u.Path, "/-/ready") 170 | 171 | // warnSkipped prevents logging on the first failure, since we 172 | // will try again and this lets us avoid the first sleep 173 | warnSkipped := false 174 | 175 | tick := time.NewTicker(m.cfg.HealthCheckRequestTimeout) 176 | defer tick.Stop() 177 | 178 | for { 179 | ctx, cancel := context.WithTimeout(inCtx, m.cfg.HealthCheckRequestTimeout) 180 | req, err := http.NewRequestWithContext(ctx, "GET", u.String(), nil) 181 | if err != nil { 182 | cancel() 183 | return errors.Wrap(err, "build request") 184 | } 185 | 186 | success := func() bool { 187 | defer cancel() 188 | resp, err := http.DefaultClient.Do(req) 189 | 190 | if resp != nil && resp.Body != nil { 191 | defer resp.Body.Close() 192 | } 193 | 194 | respOK := err == nil && resp.StatusCode/100 == 2 195 | 196 | if respOK { 197 | var result Result 198 | result, err = m.scrapeMetrics(inCtx) 199 | if err != nil { 200 | return false 201 | } 202 | err = m.checkPrometheusVersion(result) 203 | if err != nil { 204 | // invalid prometheus version is unrecoverable 205 | // cancel the caller's context and exit 206 | level.Warn(m.cfg.Logger).Log("msg", "invalid Prometheus version", "err", err) 207 | inCtxCancel() 208 | return false 209 | } 210 | var promCfg promconfig.Config 211 | promCfg, err = m.getConfig(inCtx) 212 | if err != nil { 213 | level.Warn(m.cfg.Logger).Log("msg", "invalid Prometheus config", "err", err) 214 | inCtxCancel() 215 | return false 216 | } 217 | m.globalConfig = promCfg.GlobalConfig 218 | m.scrapeConfig = promCfg.ScrapeConfigs 219 | 220 | // Great! We also need it to have completed 221 | // a full round of scrapes. 222 | if err = m.completedFirstScrapes(result, promCfg, m.cfg.StartupDelayEffectiveStartTime); err == nil { 223 | return true 224 | } 225 | } 226 | 227 | if !warnSkipped { 228 | warnSkipped = true 229 | return false 230 | } 231 | if respOK || err != nil { 232 | level.Warn(m.cfg.Logger).Log("msg", "Prometheus readiness", "err", err) 233 | } else { 234 | level.Warn(m.cfg.Logger).Log("msg", "Prometheus is not ready", "status", resp.Status) 235 | } 236 | return false 237 | }() 238 | if success { 239 | return nil 240 | } 241 | 242 | select { 243 | case <-inCtx.Done(): 244 | return inCtx.Err() 245 | case <-tick.C: 246 | continue 247 | } 248 | } 249 | } 250 | -------------------------------------------------------------------------------- /prometheus/ready_test.go: -------------------------------------------------------------------------------- 1 | package prometheus 2 | 3 | import ( 4 | "context" 5 | "net/url" 6 | "testing" 7 | "time" 8 | 9 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 10 | "github.com/lightstep/opentelemetry-prometheus-sidecar/internal/promtest" 11 | "github.com/lightstep/opentelemetry-prometheus-sidecar/telemetry" 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | var logger = telemetry.DefaultLogger() 16 | 17 | func TestReady(t *testing.T) { 18 | fs := promtest.NewFakePrometheus(promtest.Config{}) 19 | fs.SetReady(true) 20 | fs.SetIntervals(30 * time.Second) 21 | ctx, cancel := context.WithTimeout(context.Background(), config.DefaultHealthCheckTimeout*4/3) 22 | defer cancel() 23 | 24 | require.NoError(t, NewMonitor(fs.ReadyConfig()).WaitForReady(ctx, cancel)) 25 | } 26 | 27 | func TestInvalidVersion(t *testing.T) { 28 | fs := promtest.NewFakePrometheus(promtest.Config{Version: "2.1.0"}) 29 | 30 | ctx, cancel := context.WithTimeout(context.Background(), config.DefaultHealthCheckTimeout*4/3) 31 | defer cancel() 32 | err := NewMonitor(fs.ReadyConfig()).WaitForReady(ctx, cancel) 33 | require.Error(t, err) 34 | require.Equal(t, context.Canceled, err) 35 | } 36 | 37 | func TestSlowStart(t *testing.T) { 38 | if testing.Short() { 39 | t.Skip("skipping test in short mode.") 40 | } 41 | fs := promtest.NewFakePrometheus(promtest.Config{}) 42 | fs.SetReady(true) 43 | fs.SetIntervals() 44 | 45 | go func() { 46 | time.Sleep(time.Second * 3) 47 | fs.SetIntervals(30) 48 | }() 49 | ctx, cancel := context.WithTimeout(context.Background(), config.DefaultHealthCheckTimeout*4/3) 50 | defer cancel() 51 | require.NoError(t, NewMonitor(fs.ReadyConfig()).WaitForReady(ctx, cancel)) 52 | } 53 | 54 | func TestNotReady(t *testing.T) { 55 | if testing.Short() { 56 | t.Skip("skipping test in short mode.") 57 | } 58 | fs := promtest.NewFakePrometheus(promtest.Config{}) 59 | fs.SetReady(false) 60 | 61 | ctx, cancel := context.WithTimeout(context.Background(), 2*config.DefaultHealthCheckTimeout) 62 | defer cancel() 63 | err := NewMonitor(fs.ReadyConfig()).WaitForReady(ctx, cancel) 64 | require.Error(t, err) 65 | require.Equal(t, context.DeadlineExceeded, err) 66 | } 67 | 68 | func TestReadyFail(t *testing.T) { 69 | if testing.Short() { 70 | t.Skip("skipping test in short mode.") 71 | } 72 | 73 | tu, err := url.Parse("http://127.0.0.1:9999/__notfound__") 74 | require.NoError(t, err) 75 | 76 | ctx, cancel := context.WithTimeout(context.Background(), 2*config.DefaultHealthCheckTimeout) 77 | defer cancel() 78 | err = NewMonitor(config.PromReady{ 79 | Logger: logger, 80 | PromURL: tu, 81 | HealthCheckRequestTimeout: config.DefaultHealthCheckTimeout, 82 | }).WaitForReady(ctx, cancel) 83 | require.Error(t, err) 84 | } 85 | 86 | func TestReadyCancel(t *testing.T) { 87 | fs := promtest.NewFakePrometheus(promtest.Config{}) 88 | 89 | ctx, cancel := context.WithCancel(context.Background()) 90 | cancel() // immediate 91 | err := NewMonitor(fs.ReadyConfig()).WaitForReady(ctx, cancel) 92 | 93 | require.Error(t, err) 94 | require.Equal(t, context.Canceled, err) 95 | } 96 | 97 | func TestReadySpecificInterval(t *testing.T) { 98 | if testing.Short() { 99 | t.Skip("skipping test in short mode.") 100 | } 101 | fs := promtest.NewFakePrometheus(promtest.Config{}) 102 | fs.SetIntervals() // None set 103 | fs.SetPromConfigYaml(` 104 | scrape_configs: 105 | - job_name: 'long' 106 | scrape_interval: 79s 107 | static_configs: 108 | - targets: ['localhost:18000'] 109 | `) 110 | 111 | const interval = 79 * time.Second 112 | 113 | checker := NewMonitor(fs.ReadyConfig()) 114 | 115 | go func() { 116 | time.Sleep(5 * time.Second) 117 | fs.SetIntervals(interval) 118 | }() 119 | ctx, cancel := context.WithCancel(context.Background()) 120 | err := checker.WaitForReady(ctx, cancel) 121 | 122 | require.NoError(t, err) 123 | } 124 | 125 | func TestReadySpecificNoInterval(t *testing.T) { 126 | if testing.Short() { 127 | t.Skip("skipping test in short mode.") 128 | } 129 | fs := promtest.NewFakePrometheus(promtest.Config{}) 130 | fs.SetIntervals() // None set 131 | fs.SetPromConfigYaml("") 132 | 133 | checker := NewMonitor(fs.ReadyConfig()) 134 | 135 | go func() { 136 | time.Sleep(5 * time.Second) 137 | fs.SetIntervals(88 * time.Second) // unknown to the sidecar; not default 138 | }() 139 | 140 | ctx, cancel := context.WithCancel(context.Background()) 141 | defer cancel() 142 | err := checker.WaitForReady(ctx, cancel) 143 | 144 | require.NoError(t, err) 145 | } 146 | 147 | func TestReadySpecificIntervalWait(t *testing.T) { 148 | if testing.Short() { 149 | t.Skip("skipping test in short mode.") 150 | } 151 | fs := promtest.NewFakePrometheus(promtest.Config{}) 152 | fs.SetIntervals(19 * time.Second) // Not the one we want 153 | 154 | // Configure 19s and 79s intervals 155 | fs.SetPromConfigYaml(` 156 | scrape_configs: 157 | - job_name: 'short' 158 | scrape_interval: 19s 159 | static_configs: 160 | - targets: ['localhost:18001'] 161 | - job_name: 'long' 162 | scrape_interval: 79s 163 | static_configs: 164 | - targets: ['localhost:18000'] 165 | `) 166 | 167 | checker := NewMonitor(fs.ReadyConfig()) 168 | checker.cfg.StartupDelayEffectiveStartTime = time.Now() 169 | 170 | ctx, cancel := context.WithTimeout(context.Background(), config.DefaultHealthCheckTimeout*4/3) 171 | defer cancel() 172 | err := checker.WaitForReady(ctx, cancel) 173 | 174 | require.Error(t, err) 175 | require.Equal(t, context.DeadlineExceeded, err) 176 | } 177 | 178 | func TestStartupDelayEffectiveStartTime(t *testing.T) { 179 | if testing.Short() { 180 | t.Skip("skipping test in short mode.") 181 | } 182 | fs := promtest.NewFakePrometheus(promtest.Config{}) 183 | fs.SetIntervals(19 * time.Second) // Not the one we want 184 | 185 | // Configure 19s and 79s intervals 186 | fs.SetPromConfigYaml(` 187 | scrape_configs: 188 | - job_name: 'short' 189 | scrape_interval: 19s 190 | static_configs: 191 | - targets: ['localhost:18001'] 192 | - job_name: 'long' 193 | scrape_interval: 79s 194 | static_configs: 195 | - targets: ['localhost:18000'] 196 | `) 197 | 198 | checker := NewMonitor(fs.ReadyConfig()) 199 | checker.cfg.StartupDelayEffectiveStartTime = time.Now().Add(-(79*time.Second + config.DefaultScrapeIntervalWaitPeriod)) 200 | 201 | ctx, cancel := context.WithTimeout(context.Background(), config.DefaultHealthCheckTimeout*4/3) 202 | defer cancel() 203 | err := checker.WaitForReady(ctx, cancel) 204 | 205 | require.NoError(t, err) 206 | 207 | } 208 | 209 | func TestReadyConfigParseError(t *testing.T) { 210 | fs := promtest.NewFakePrometheus(promtest.Config{}) 211 | fs.SetIntervals() // None set 212 | fs.SetPromConfigYaml("sdf") 213 | 214 | checker := NewMonitor(fs.ReadyConfig()) 215 | 216 | ctx, cancel := context.WithCancel(context.Background()) 217 | defer cancel() 218 | err := checker.WaitForReady(ctx, cancel) 219 | 220 | require.Error(t, err) 221 | require.Equal(t, context.Canceled, err) 222 | } 223 | -------------------------------------------------------------------------------- /retrieval/delay.go: -------------------------------------------------------------------------------- 1 | package retrieval 2 | 3 | import ( 4 | "context" 5 | "github.com/go-kit/kit/log" 6 | "github.com/lightstep/opentelemetry-prometheus-sidecar/leader" 7 | "time" 8 | ) 9 | 10 | type delay struct { 11 | duration time.Duration 12 | intervalCheck time.Duration 13 | lc leader.Candidate 14 | logger log.Logger 15 | } 16 | 17 | // delayNonLeaderSidecar delays non leader sidecars to create a 18 | // gap cover when a new leader is elected. 19 | // 20 | // This will delay the tailer for a maximum of delay.duration. 21 | // After delay.intervalCheck a check to see if the sidecar has 22 | // become the leader is done, if the sidecar is leading 23 | // we resume the tailer otherwise we continue delaying up to 24 | // the delay.duration. 25 | // 26 | // 27 | // This is a safe measure to reduce the chances of creating gaps 28 | // from the time that a sidecar steps down and another sidecar 29 | // becomes the leader. 30 | func (d *delay) delayNonLeaderSidecar(ctx context.Context, millis int64) { 31 | if d.lc.IsLeader() { 32 | return 33 | } 34 | 35 | ts := int64(time.Duration(millis) * time.Millisecond / time.Nanosecond) 36 | timestamp := time.Unix(0, ts) 37 | if time.Since(timestamp) > d.duration { 38 | return 39 | } 40 | 41 | d.logger.Log("msg", "delaying tailer", "delay_time", d.duration) 42 | for i := 0; i < int(d.duration/d.intervalCheck); i++ { 43 | select { 44 | case <-ctx.Done(): 45 | return 46 | case <-time.After(d.intervalCheck): 47 | if d.lc.IsLeader() { 48 | d.logger.Log("msg", "resuming tailer, sidecar is now leading") 49 | return 50 | } 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /retrieval/delay_test.go: -------------------------------------------------------------------------------- 1 | package retrieval 2 | 3 | import ( 4 | "context" 5 | "github.com/go-kit/kit/log" 6 | "github.com/lightstep/opentelemetry-prometheus-sidecar/leader" 7 | "os" 8 | "sync/atomic" 9 | "testing" 10 | "time" 11 | ) 12 | 13 | type settableLeader struct { 14 | leader atomic.Value 15 | } 16 | 17 | func (s settableLeader) Start(_ context.Context) error { 18 | return nil 19 | } 20 | 21 | func (s settableLeader) IsLeader() bool { 22 | return s.leader.Load().(bool) 23 | } 24 | 25 | var _ leader.Candidate = (*settableLeader)(nil) 26 | 27 | func TestDelayWhenLeaderShouldEndWithoutContextError(t *testing.T) { 28 | l := &settableLeader{} 29 | l.leader.Store(true) 30 | logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) 31 | 32 | d := delay{ 33 | duration: 30 * time.Second, 34 | intervalCheck: 5 * time.Second, 35 | lc: l, 36 | logger: logger, 37 | } 38 | 39 | ctx, cancel := context.WithTimeout(context.Background(), time.Second) 40 | defer cancel() 41 | 42 | d.delayNonLeaderSidecar(ctx, 0) 43 | if ctx.Err() != nil { 44 | t.Error("context should not contain error when sidecar is leader") 45 | } 46 | } 47 | 48 | func TestDelayWhenNotLeaderShouldEndWithContextError(t *testing.T) { 49 | l := &settableLeader{} 50 | l.leader.Store(false) 51 | logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) 52 | 53 | d := delay{ 54 | duration: time.Minute, 55 | intervalCheck: 10 * time.Millisecond, 56 | lc: l, 57 | logger: logger, 58 | } 59 | 60 | ctx, cancel := context.WithTimeout(context.Background(), time.Second) 61 | defer cancel() 62 | 63 | d.delayNonLeaderSidecar(ctx, time.Now().UnixNano()/int64(time.Millisecond)) 64 | if ctx.Err() == nil { 65 | t.Error("context should timeout when not leader") 66 | } 67 | } 68 | 69 | func TestDelayWhenNotLeaderShouldDelay(t *testing.T) { 70 | l := &settableLeader{} 71 | l.leader.Store(false) 72 | logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) 73 | 74 | d := delay{ 75 | duration: 150 * time.Millisecond, 76 | intervalCheck: 10 * time.Millisecond, 77 | lc: l, 78 | logger: logger, 79 | } 80 | 81 | ctx := context.Background() 82 | 83 | before := time.Now() 84 | d.delayNonLeaderSidecar(ctx, time.Now().UnixNano()/int64(time.Millisecond)) 85 | 86 | if delayTime := time.Since(before); delayTime < 150*time.Millisecond { 87 | t.Errorf("expected at least 150ms of delay, got %s instead", delayTime) 88 | } 89 | } 90 | 91 | func TestDelayBecomeLeaderShouldStopDelay(t *testing.T) { 92 | l := &settableLeader{} 93 | l.leader.Store(false) 94 | logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) 95 | 96 | d := delay{ 97 | duration: 5 * time.Minute, 98 | intervalCheck: 10 * time.Millisecond, 99 | lc: l, 100 | logger: logger, 101 | } 102 | 103 | ctx, cancel := context.WithTimeout(context.Background(), time.Second) 104 | defer cancel() 105 | 106 | go func() { 107 | time.Sleep(100 * time.Millisecond) 108 | l.leader.Store(true) 109 | }() 110 | 111 | d.delayNonLeaderSidecar(ctx, time.Now().UnixNano()/int64(time.Millisecond)) 112 | if ctx.Err() != nil { 113 | t.Errorf("context should not contain error when sidecar becomes leader before maximum delay time, got error: %v", ctx.Err()) 114 | } 115 | 116 | } 117 | -------------------------------------------------------------------------------- /retrieval/manager_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Google Inc. 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | 14 | package retrieval 15 | 16 | import ( 17 | "context" 18 | "github.com/lightstep/opentelemetry-prometheus-sidecar/leader" 19 | "io/ioutil" 20 | "os" 21 | "strings" 22 | "sync" 23 | "testing" 24 | "time" 25 | 26 | sidecar "github.com/lightstep/opentelemetry-prometheus-sidecar" 27 | "github.com/lightstep/opentelemetry-prometheus-sidecar/config" 28 | "github.com/lightstep/opentelemetry-prometheus-sidecar/internal/otlptest" 29 | "github.com/lightstep/opentelemetry-prometheus-sidecar/internal/promtest" 30 | "github.com/lightstep/opentelemetry-prometheus-sidecar/prometheus" 31 | "github.com/lightstep/opentelemetry-prometheus-sidecar/tail" 32 | "github.com/lightstep/opentelemetry-prometheus-sidecar/telemetry" 33 | "github.com/prometheus/common/version" 34 | "github.com/prometheus/prometheus/pkg/labels" 35 | "github.com/prometheus/prometheus/pkg/textparse" 36 | "github.com/prometheus/prometheus/tsdb/record" 37 | "github.com/prometheus/prometheus/tsdb/wal" 38 | "github.com/stretchr/testify/require" 39 | metric_pb "go.opentelemetry.io/proto/otlp/metrics/v1" 40 | resource_pb "go.opentelemetry.io/proto/otlp/resource/v1" 41 | ) 42 | 43 | type nopAppender struct { 44 | lock sync.Mutex 45 | samples []SizedMetric 46 | } 47 | 48 | func (a *nopAppender) Append(s SizedMetric) { 49 | a.lock.Lock() 50 | defer a.lock.Unlock() 51 | 52 | a.samples = append(a.samples, s) 53 | } 54 | 55 | func (a *nopAppender) getSamples() []SizedMetric { 56 | a.lock.Lock() 57 | defer a.lock.Unlock() 58 | 59 | return a.samples 60 | } 61 | 62 | func TestReader_Progress(t *testing.T) { 63 | dir, err := ioutil.TempDir("", "progress") 64 | if err != nil { 65 | t.Fatal(err) 66 | } 67 | defer os.RemoveAll(dir) 68 | 69 | ctx, cancel := context.WithCancel(context.Background()) 70 | 71 | w, err := wal.New(nil, nil, dir, false) 72 | if err != nil { 73 | t.Fatal(err) 74 | } 75 | 76 | prom := promtest.NewFakePrometheus(promtest.Config{}) 77 | 78 | tailer, err := tail.Tail(ctx, telemetry.DefaultLogger(), dir, prometheus.NewMonitor(prom.ReadyConfig())) 79 | if err != nil { 80 | t.Fatal(err) 81 | } 82 | 83 | var enc record.Encoder 84 | // Write single series record that we use for all sample records. 85 | err = w.Log(enc.Series([]record.RefSeries{ 86 | {Ref: 1, Labels: labels.FromStrings("__name__", "metric1", "job", "job1", "instance", "inst1")}, 87 | }, nil)) 88 | if err != nil { 89 | t.Fatal(err) 90 | } 91 | 92 | // Populate the getters with data. 93 | metadataMap := promtest.MetadataMap{ 94 | "job1/inst1/metric1": &config.MetadataEntry{Metric: "metric1", MetricType: textparse.MetricTypeGauge, Help: "help"}, 95 | } 96 | 97 | failingSet := testFailingReporter{} 98 | r := NewPrometheusReader(nil, dir, tailer, nil, nil, metadataMap, &nopAppender{}, "", 0, nil, failingSet, leader.NewAlwaysLeaderCandidate()) 99 | r.progressSaveInterval = 200 * time.Millisecond 100 | 101 | // Populate sample data 102 | go func() { 103 | defer cancel() 104 | writeCtx, _ := context.WithTimeout(ctx, 2*time.Second) 105 | 106 | for { 107 | select { 108 | case <-writeCtx.Done(): 109 | return 110 | default: 111 | } 112 | // Create sample batches but only populate the first sample with a valid series. 113 | // This way we write more data but only record a single signaling sample 114 | // that encodes the record's offset in its timestamp. 115 | sz, err := tailer.Size() 116 | if err != nil { 117 | t.Error(err) 118 | break 119 | } 120 | samples := make([]record.RefSample, 1000) 121 | samples[0] = record.RefSample{Ref: 1, T: int64(sz) * 1000} 122 | 123 | // Note: We must update the segment number in order for 124 | // the Tail reader to make progress. 125 | // 126 | // Note: This uses the default segment size, independent of 127 | // the actual segment size, because that's what the sidecar 128 | // uses to calculate Size(), so this expression is consistent. 129 | prom.SetSegment(sz / wal.DefaultSegmentSize) 130 | 131 | if err := w.Log(enc.Samples(samples, nil)); err != nil { 132 | t.Error(err) 133 | break 134 | } 135 | } 136 | }() 137 | // Proess the WAL until the writing goroutine completes. 138 | r.Run(ctx, 0) 139 | 140 | progressOffset, err := ReadProgressFile(dir) 141 | if err != nil { 142 | t.Fatal(err) 143 | } 144 | // We should've head enough time to have save a reasonably large offset. 145 | if progressOffset <= 2*progressBufferMargin { 146 | t.Fatalf("saved offset too low at %d", progressOffset) 147 | } 148 | writeOffset := tailer.Offset() 149 | 150 | // Initializing a new tailer and reader should read samples again but skip those that are 151 | // below our offset. 152 | // Due to the buffer margin, we will still read some old records, but not all of them. 153 | // Thus we don't need to write any new records to verify correctness. 154 | ctx, cancel = context.WithCancel(context.Background()) 155 | defer cancel() 156 | 157 | tailer, err = tail.Tail(ctx, telemetry.DefaultLogger(), dir, prometheus.NewMonitor(prom.ReadyConfig())) 158 | if err != nil { 159 | t.Fatal(err) 160 | } 161 | 162 | recorder := &nopAppender{} 163 | 164 | r = NewPrometheusReader(nil, dir, tailer, nil, nil, metadataMap, recorder, "", 0, nil, failingSet, leader.NewAlwaysLeaderCandidate()) 165 | go r.Run(ctx, progressOffset) 166 | 167 | // Wait for reader to process until the end. 168 | ctx, _ = context.WithTimeout(ctx, 5*time.Second) 169 | for { 170 | select { 171 | case <-ctx.Done(): 172 | t.Fatal("timed out waiting for reader") 173 | default: 174 | } 175 | if tailer.Offset() >= writeOffset { 176 | break 177 | } 178 | time.Sleep(100 * time.Millisecond) 179 | } 180 | samples := recorder.getSamples() 181 | if len(samples) == 0 { 182 | t.Fatal("expected records but got none") 183 | } 184 | 185 | ctx = context.Background() 186 | 187 | for i, s := range samples { 188 | vs := otlptest.VisitorState{} 189 | vs.Visit(ctx, func( 190 | resource *resource_pb.Resource, 191 | metricName string, 192 | kind config.Kind, 193 | monotonic bool, 194 | point interface{}, 195 | ) error { 196 | nanos := point.(*metric_pb.NumberDataPoint).TimeUnixNano 197 | tseconds := time.Unix(0, int64(nanos)).Unix() 198 | 199 | if tseconds <= int64(progressOffset)-progressBufferMargin { 200 | t.Fatalf("unexpected record %d for offset %d", i, tseconds) 201 | } 202 | return nil 203 | }, resourceMetric(s.Metric())) 204 | } 205 | 206 | require.EqualValues(t, map[string]bool{}, failingSet) 207 | } 208 | 209 | func resourceMetric(m *metric_pb.Metric) *metric_pb.ResourceMetrics { 210 | return otlptest.ResourceMetrics( 211 | otlptest.Resource(), 212 | otlptest.InstrumentationLibraryMetrics( 213 | otlptest.InstrumentationLibrary(sidecar.ExportInstrumentationLibrary, version.Version), 214 | m, 215 | ), 216 | ) 217 | } 218 | 219 | func TestReader_ProgressFile(t *testing.T) { 220 | dir, err := ioutil.TempDir("", "save_progress") 221 | if err != nil { 222 | t.Fatal(err) 223 | } 224 | defer os.RemoveAll(dir) 225 | 226 | offset, err := ReadProgressFile(dir) 227 | if err != nil { 228 | t.Fatalf("read progress: %s", err) 229 | } 230 | if offset != 0 { 231 | t.Fatalf("expected offset %d but got %d", 0, offset) 232 | } 233 | if err := SaveProgressFile(dir, progressBufferMargin+12345); err != nil { 234 | t.Fatalf("save progress: %s", err) 235 | } 236 | offset, err = ReadProgressFile(dir) 237 | if err != nil { 238 | t.Fatalf("read progress: %s", err) 239 | } 240 | if offset != 12345 { 241 | t.Fatalf("expected progress offset %d but got %d", 12345, offset) 242 | } 243 | } 244 | 245 | func TestCombinePair(t *testing.T) { 246 | t1 := time.Now() 247 | t2 := t1.Add(time.Second) 248 | t3 := t2.Add(time.Second) 249 | t4 := t3.Add(time.Second) 250 | 251 | dp1 := otlptest.DoubleDataPoint( 252 | otlptest.Attributes( 253 | otlptest.Attribute("A", "B"), 254 | otlptest.Attribute("C", "D"), 255 | ), 256 | t1, t2, 10, 257 | ) 258 | dp2 := otlptest.DoubleDataPoint( 259 | otlptest.Attributes( 260 | otlptest.Attribute("A", "C"), 261 | otlptest.Attribute("B", "D"), 262 | ), 263 | t3, t4, 20, 264 | ) 265 | 266 | p1 := otlptest.Gauge("test", "", "", dp1) 267 | p2 := otlptest.Gauge("test", "", "", dp2) 268 | 269 | require.True(t, combine(p1, p2)) 270 | require.Equal(t, p1, otlptest.Gauge("test", "", "", dp1, dp2)) 271 | } 272 | 273 | func TestAppendSamples(t *testing.T) { 274 | const ( 275 | count = 100 276 | lsize = 1024 277 | overhead = 100 278 | ) 279 | hugeLabels := otlptest.Attributes(otlptest.Attribute("1kb", strings.Repeat("x", lsize))) 280 | recorder := &nopAppender{} 281 | 282 | startTime := time.Now() 283 | 284 | var original []float64 285 | var points []*metric_pb.Metric 286 | 287 | newPoint := func(i int) *metric_pb.Metric { 288 | return otlptest.Gauge("test", "", "", 289 | otlptest.DoubleDataPoint( 290 | hugeLabels, startTime, startTime.Add(time.Duration(i)*time.Second), float64(i))) 291 | } 292 | 293 | for i := 0; i < count; i++ { 294 | original = append(original, float64(i)) 295 | points = append(points, newPoint(i)) 296 | } 297 | 298 | appendSamples(recorder, points) 299 | 300 | pointsPerBatch := (batchLimit - overhead) / lsize 301 | 302 | // Expect reduction in metric count, ignore rounding. 303 | require.GreaterOrEqual(t, len(recorder.samples), int(float64(count)/float64(pointsPerBatch))) 304 | require.LessOrEqual(t, len(recorder.samples), 1+int(float64(count)/float64(pointsPerBatch))) 305 | 306 | var output []*metric_pb.Metric 307 | 308 | totalCount := 0 309 | for _, sm := range recorder.samples { 310 | output = append(output, sm.Metric()) 311 | totalCount += sm.Count() 312 | } 313 | 314 | require.Equal(t, count, totalCount) 315 | 316 | var received []float64 317 | 318 | vs := otlptest.VisitorState{} 319 | vs.Visit(context.Background(), 320 | func( 321 | resource *resource_pb.Resource, 322 | metricName string, 323 | kind config.Kind, 324 | monotonic bool, 325 | point interface{}, 326 | ) error { 327 | ddp := point.(*metric_pb.NumberDataPoint) 328 | number := ddp.Value 329 | value := number.(*metric_pb.NumberDataPoint_AsDouble).AsDouble 330 | received = append(received, value) 331 | require.Equal(t, uint64(startTime.UnixNano()), ddp.StartTimeUnixNano) 332 | require.Equal(t, uint64(startTime.Add(time.Second*time.Duration(int64(value))).UnixNano()), ddp.TimeUnixNano) 333 | return nil 334 | }, 335 | otlptest.ResourceMetrics( 336 | otlptest.Resource(), 337 | otlptest.InstrumentationLibraryMetrics( 338 | otlptest.InstrumentationLibrary("test", "v0"), 339 | output..., 340 | ), 341 | ), 342 | ) 343 | 344 | // Expect the same values, same order. 345 | require.Equal(t, original, received) 346 | } 347 | -------------------------------------------------------------------------------- /retrieval/testdata/bearertoken.txt: -------------------------------------------------------------------------------- 1 | 12345 2 | -------------------------------------------------------------------------------- /retrieval/testdata/ca.cer: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIDkTCCAnmgAwIBAgIJAJNsnimNN3tmMA0GCSqGSIb3DQEBCwUAMF8xCzAJBgNV 3 | BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg 4 | Q29tcGFueSBMdGQxGzAZBgNVBAMMElByb21ldGhldXMgVGVzdCBDQTAeFw0xNTA4 5 | MDQxNDA5MjFaFw0yNTA4MDExNDA5MjFaMF8xCzAJBgNVBAYTAlhYMRUwEwYDVQQH 6 | DAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQgQ29tcGFueSBMdGQxGzAZ 7 | BgNVBAMMElByb21ldGhldXMgVGVzdCBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEP 8 | ADCCAQoCggEBAOlSBU3yWpUELbhzizznR0hnAL7dbEHzfEtEc6N3PoSvMNcqrUVq 9 | t4kjBRWzqkZ5uJVkzBPERKEBoOI9pWcrqtMTBkMzHJY2Ep7GHTab10e9KC2IFQT6 10 | FKP/jCYixaIVx3azEfajRJooD8r79FGoagWUfHdHyCFWJb/iLt8z8+S91kelSRMS 11 | yB9M1ypWomzBz1UFXZp1oiNO5o7/dgXW4MgLUfC2obJ9j5xqpc6GkhWMW4ZFwEr/ 12 | VLjuzxG9B8tLfQuhnXKGn1W8+WzZVWCWMD/sLfZfmjKaWlwcXzL51g8E+IEIBJqV 13 | w51aMI6lDkcvAM7gLq1auLZMVXyKWSKw7XMCAwEAAaNQME4wHQYDVR0OBBYEFMz1 14 | BZnlqxJp2HiJSjHK8IsLrWYbMB8GA1UdIwQYMBaAFMz1BZnlqxJp2HiJSjHK8IsL 15 | rWYbMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAI2iA3w3TK5J15Pu 16 | e4fPFB4jxQqsbUwuyXbCCv/jKLeFNCD4BjM181WZEYjPMumeTBVzU3aF45LWQIG1 17 | 0DJcrCL4mjMz9qgAoGqA7aDDXiJGbukMgYYsn7vrnVmrZH8T3E8ySlltr7+W578k 18 | pJ5FxnbCroQwn0zLyVB3sFbS8E3vpBr3L8oy8PwPHhIScexcNVc3V6/m4vTZsXTH 19 | U+vUm1XhDgpDcFMTg2QQiJbfpOYUkwIgnRDAT7t282t2KQWtnlqc3zwPQ1F/6Cpx 20 | j19JeNsaF1DArkD7YlyKj/GhZLtHwFHG5cxznH0mLDJTW7bQvqqh2iQTeXmBk1lU 21 | mM5lH/s= 22 | -----END CERTIFICATE----- 23 | -------------------------------------------------------------------------------- /retrieval/testdata/ca.key: -------------------------------------------------------------------------------- 1 | -----BEGIN RSA PRIVATE KEY----- 2 | MIIEpgIBAAKCAQEA6VIFTfJalQQtuHOLPOdHSGcAvt1sQfN8S0Rzo3c+hK8w1yqt 3 | RWq3iSMFFbOqRnm4lWTME8REoQGg4j2lZyuq0xMGQzMcljYSnsYdNpvXR70oLYgV 4 | BPoUo/+MJiLFohXHdrMR9qNEmigPyvv0UahqBZR8d0fIIVYlv+Iu3zPz5L3WR6VJ 5 | ExLIH0zXKlaibMHPVQVdmnWiI07mjv92BdbgyAtR8Lahsn2PnGqlzoaSFYxbhkXA 6 | Sv9UuO7PEb0Hy0t9C6GdcoafVbz5bNlVYJYwP+wt9l+aMppaXBxfMvnWDwT4gQgE 7 | mpXDnVowjqUORy8AzuAurVq4tkxVfIpZIrDtcwIDAQABAoIBAQCcVDd3pYWpyLX1 8 | m31UnkX1rgYi3Gs3uTOznra4dSIvds6LrG2SUFGPEibLBql1NQNHHdVa/StakaPB 9 | UrqraOe5K0sL5Ygm4S4Ssf1K5JoW2Be+gipLPmBsDcJSnwO6eUs/LfZAQd6qR2Nl 10 | hvGJcQUwne/TYAYox/bdHWh4Zu/odz4NrZKZLbnXkdLLDEhZbjA0HpwJZ7NpMcB7 11 | Z6NayOm5dAZncfqBjY+3GNL0VjvDjwwYbESM8GkAbojMgcpODGk0h9arRWCP2RqT 12 | SVgmiFI2mVT7sW1XLdVXmyCL2jzak7sktpbLbVgngwOrBmLO/m4NBftzcZrgvxj3 13 | YakCPH/hAoGBAP1v85pIxqWr5dFdRlOW0MG35ifL+YXpavcs233jGDHYNZefrR5q 14 | Mw8eA20zwj41OdryqGh58nLYm3zYM0vPFrRJrzWYQfcWDmQELAylr9z9vsMj8gRq 15 | IZQD6wzFmLi1PN2QDmovF+2y/CLAq03XK6FQlNsVQxubfjh4hcX5+nXDAoGBAOut 16 | /pQaIBbIhaI8y3KjpizU24jxIkV8R/q1yE5V01YCl2OC5hEd4iZP14YLDRXLSHKT 17 | e/dyJ/OEyTIzUeDg0ZF3ao9ugbWuASgrnrrdPEooi7C9n9PeaLFTK5oVZoVP2A7E 18 | BwhSFW3VdEzQkdJczVE2jOY6JdBKMndjoDQnhT6RAoGBAL4WMO1gdnYeZ0JQJoZd 19 | kPgrOZpR2DaDa3I3F+3k3enM0+2EmzE70E4fYcyPTLqh62H4LS4ngRx4sK7D7j2G 20 | 9u2EcsDNEXUE+wgzROK7hxtGysTMeiKrg8Hj6nFq53Bqp1s7SESGS/lCDPD398Rr 21 | hdL5gJyN5waW6uXqJ9Pk+eFHAoGBAKV/YGcV1XTKSPT9ZgxRmM6ghq0qT1umA1Gt 22 | t0QzBp2+Yhqx/+cDKhynMnxhZEXqoyw6HvJLSny5wSMsYJHeratNxRmFizZOQ2e3 23 | AdbMppqY0EdDUWnRI4lqExM3de+let4bj6irI3smSm3qhIvJOTCPcu/04zrZ74hh 24 | AE2/dtTRAoGBAO6bENEqLgxZOvX5NnbytTuuoEnbceUPiIvc6S/nWJPEoGXVN2EJ 25 | a3OaIOQmknE6bjXIWrHTaXJhwejvPUz9DVa4GxU5aJhs4gpocVGf+owQFvk4nJO8 26 | JL+QVVdXp3XdrXIGyvXJfy0fXXgJg5czrnDHjSTE8/2POtyuZ6VyBtQc 27 | -----END RSA PRIVATE KEY----- 28 | -------------------------------------------------------------------------------- /retrieval/testdata/client.cer: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIERjCCAy6gAwIBAgIBZDANBgkqhkiG9w0BAQUFADBfMQswCQYDVQQGEwJYWDEV 3 | MBMGA1UEBwwMRGVmYXVsdCBDaXR5MRwwGgYDVQQKDBNEZWZhdWx0IENvbXBhbnkg 4 | THRkMRswGQYDVQQDDBJQcm9tZXRoZXVzIFRlc3QgQ0EwHhcNMTUwODA0MTQ0MTE2 5 | WhcNNDIxMjIwMTQ0MTE2WjBVMQswCQYDVQQGEwJYWDEVMBMGA1UEBwwMRGVmYXVs 6 | dCBDaXR5MRwwGgYDVQQKDBNEZWZhdWx0IENvbXBhbnkgTHRkMREwDwYDVQQDDAh0 7 | ZXN0dXNlcjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAOKBBXx35X9+ 8 | BLGqY/cC2+lQYZzn13Z8ZEDrUKpv5n91QA0B/YZE3gDSnk2yry8dxmp1NJtXm8Wr 9 | rIQSBnsTGOKwyIwR1gcggUYPD9fCyy7T7y7YbzBG8drEcxiK/YIWyio0fpRCfT9b 10 | 2+fOEeY+0+tgFV++XjbXVzXRCBMmsZ22cOm4t2t7GHKBZhYoUoPgKjDn+4t/rr0r 11 | 1od6yVOocYCo6RruQHsWPHj6QlU8VGutkD7PpvLS+w2l/6JqmZDHlY6o6pDidC8a 12 | kp8i/t3pNBlexk6st/8YZ5S9j6LjqC6bUnerUZB40b6L8OXXwWS3S5y6t07A1QIn 13 | Pv2DZKGbn8Uuj7RvS5OAZdDn1P+M5aVlRLoYbdTHJILrLg+bxyDIokqONbLgj78A 14 | FT6a013eJAZJBkeoaN7Djbf/d5FjRDadH2bX0Uur3APh4cbv+0Fo13CPPSckA9EU 15 | o42qBmKLWys858D8vRKyS/mq/IeRL0AIwKuaEIJtPtiwCTnk6PvFfQvO80z/Eyq+ 16 | uvRBoZbrWHb+3GR8rNzu8Gc1UbTC+jnGYtbQhxx1/7nae52XGRpplnwPO9cb+px2 17 | Zf802h+lP3SMY/XS+nyTAp/jcy/jOAwrZKY4rgz+5ZmKCI61NZ0iovaK7Jqo9qTM 18 | iSjykZCamFhm4pg8itECD5FhnUetJ6axAgMBAAGjFzAVMBMGA1UdJQQMMAoGCCsG 19 | AQUFBwMCMA0GCSqGSIb3DQEBBQUAA4IBAQDEQyFfY9WAkdnzb+vIlICgfkydceYx 20 | KVJZ2WRMvrn2ZoRoSaK3CfGlz4nrCOgDjQxfX8OpKzudr/ghuBQCbDHHzxRrOen5 21 | 0Zig9Q+pxTZNrtds/SwX2dHJ7PVEwGxXXaKl8S19bNEdO0syFrRJU6I50ZbeEkJe 22 | RI9IEFvBHcuG/GnEfqWj2ozI/+VhIOb4cTItg67ClmIPe8lteT2wj+/aydF9PKgF 23 | QhooCe/G1nok1uiaGjo1HzFEn4HzI3s4mrolc8PpBBVsS+HckCOrHpRPWnYuCFEm 24 | 0yzS6tGaMrnITywwB2/uJ2aBAZIx2Go1zFhPf0YvFJc3e2x8cAuqBRLu 25 | -----END CERTIFICATE----- 26 | -------------------------------------------------------------------------------- /retrieval/testdata/client.key: -------------------------------------------------------------------------------- 1 | -----BEGIN RSA PRIVATE KEY----- 2 | MIIJKAIBAAKCAgEA4oEFfHflf34Esapj9wLb6VBhnOfXdnxkQOtQqm/mf3VADQH9 3 | hkTeANKeTbKvLx3GanU0m1ebxaushBIGexMY4rDIjBHWByCBRg8P18LLLtPvLthv 4 | MEbx2sRzGIr9ghbKKjR+lEJ9P1vb584R5j7T62AVX75eNtdXNdEIEyaxnbZw6bi3 5 | a3sYcoFmFihSg+AqMOf7i3+uvSvWh3rJU6hxgKjpGu5AexY8ePpCVTxUa62QPs+m 6 | 8tL7DaX/omqZkMeVjqjqkOJ0LxqSnyL+3ek0GV7GTqy3/xhnlL2PouOoLptSd6tR 7 | kHjRvovw5dfBZLdLnLq3TsDVAic+/YNkoZufxS6PtG9Lk4Bl0OfU/4zlpWVEuhht 8 | 1MckgusuD5vHIMiiSo41suCPvwAVPprTXd4kBkkGR6ho3sONt/93kWNENp0fZtfR 9 | S6vcA+Hhxu/7QWjXcI89JyQD0RSjjaoGYotbKzznwPy9ErJL+ar8h5EvQAjAq5oQ 10 | gm0+2LAJOeTo+8V9C87zTP8TKr669EGhlutYdv7cZHys3O7wZzVRtML6OcZi1tCH 11 | HHX/udp7nZcZGmmWfA871xv6nHZl/zTaH6U/dIxj9dL6fJMCn+NzL+M4DCtkpjiu 12 | DP7lmYoIjrU1nSKi9orsmqj2pMyJKPKRkJqYWGbimDyK0QIPkWGdR60nprECAwEA 13 | AQKCAgEA18az1ERf9Fm33Q0GmE039IdnxlMy9qQ/2XyS5xsdCXVIZFvuClhW6Y+7 14 | 0ScVLpx95fLr/8SxF9mYymRlmh+ySFrDYnSnYTi9DmHQ5OmkKGMr64OyQNqFErSt 15 | NMdMA/7z7sr9fv3sVUyMLMMqWB6oQgXRttki5bm1UgZlW+EzuZwQ6wbWbWTiAEt3 16 | VkppeUo2x0poXxdu/rXhdEUrwC+qmTfQgaBQ+zFOwK0gPhTwE3hP/xZQ4+jL08+8 17 | vRwyWTNZLYOLmiSxLCJzZXiwNfUwda7M2iw+SJ0WKCOBz1pzYJsFMA2b8Ta4EX89 18 | Kailiu328UMK19Jp2dhLcLUYS8B2rVVAK5b/O6iKV8UpKTriXDiCKSpcugpsQ1ML 19 | zq/6vR0SQXD+/W0MesGaNa33votBXJSsf9kZnYJw43n+W4Z/XFUE5pyNM/+TGAqw 20 | yuF4FX2sJL1uP5VMOh2HdthTr+/ewx/Trn9/re0p54z83plVlp4qbcORLiQ2uDf6 21 | ZZ0/gHzNTp4Fzz81ZvHLm9smpe8cLvojrKLvCl0hv5zAf3QtsajpTN9uM7AsshV1 22 | QVZSuAxb5n9bcij5F2za1/dd7WLlvsSzgNJ4Td/gEDI8qepB0+7PGlJ17sMg0nWP 23 | nFxUfGIsCF1KOoPwLyaNHHrRGjJigFUufqkbmSWkOzgC6pZVUXECggEBAP81To16 24 | O5BlGDahcQkjKkqUwUtkhjE9/KQBh3zHqxsitI8f0U7eL3Ge1qhbgEgvHwHOjWSV 25 | pcG9atE55b7qlqqGQboiO1jfyLfIVLfamj0fHLinO/pV/wcBNy6Hz4rP7DNJDCMz 26 | 0agz/Ys3VXrZIk5sO0sUBYMBxho1x0n65Z06iK1SwD/x4Xg3/Psyx+ujEEkSsv5I 27 | Gg7aOTHLRSIPUx/OK+4M3sp58PeMGfEYNYxNiEoMiUQgu/srKRjs+pUKXCkEraNW 28 | 8s/ODYJ7iso6Z1z4NxfBH+hh+UrxTffh7t0Sz5gdUwUnBNb2I4EdeCcCTOnWYkut 29 | /GKW8oHD7f9VDS0CggEBAOM06rrp9rSsl6UhTu8LS5bjBeyUxab4HLZKP5YBitQO 30 | ltcPS05MxQ3UQ1BAMDRjXE2nrKlWMOAybrffEXBi4U1jYt7CiuCwwsPyaYNWT5qO 31 | Iwdjebkeq3+Mh8c48swhOwRLWSGg6mtRoR/c5cthYU62+s2zdxc/yhVTQ0WNFabT 32 | 23PYtjjW41WuR6K7Dhrdcw0MwIs1arZHTsDdU6Hln9raTSNwlHMBWVz/tzuwLieQ 33 | WEUXvsQvPtgPyohmDd0ueXiuS2FiYaXKFIMFj5/JyyJc1OCr1vIQN8mMcUjNbk2I 34 | VaeeSPawgKIiYARhbjJtjwjY6D59gOZrNGYASQOTGhUCggEAJPOB8SgekbShgd90 35 | L1+BExVgu1rNtzmDZ/e0t1Ntqdsni4WO172B3xChgfTlqQ3xjmBqxoKIYnnbinm4 36 | kyECOaSAxcOJFkAonruJ0Kj9JhZoITBNldx3tXruk3UkjrO2PmK4OCybkaAdeNfF 37 | L6lat0Iif6dheOt71HWu6j5CmrZL7dSKc3fBLpfksDZVDgApLntfoUOtSjM8jsIg 38 | u2K+pV9Dqw7//w8S3bTSWL8pmavsLNSN12hp7177b1l4mrXKTEIaJglD1OS/vgHH 39 | QaqdJq/lwjG7PflZkAlKQbbbz/SWTC8Kwzc4EyvGTj6HFBbYLg9VYiHJ5jh22mUV 40 | A6A77QKCAQAM6DWpdp8QNnnK5LCCPecGZFEy1mTADno7FM6169KCJ24EO5cwlIXh 41 | Ojy0s2DJqRdWRf82A3J1WggWI/Luqn9YERxNwUl4aDI4RW4fCuksw4RT6B/DF23w 42 | qgAQnjiUxhJ/NPSUR3rpq9J2Z+sZ+ac4fIaU5uwOAw6s1XUN32zqdECUPSxk4Dg7 43 | 5tGk+fFcL1ZY2G+buOYeAsEDjc8xdET3fs1BBSU5v0rfUJuNJX4Ju1Z4Xlf09yYf 44 | yg3cX8fL19cItwYLOzaG34r4wnkdP65tfk6NkNV+HNO+fF73Hsx0VRlgk0pb0T0N 45 | eNxxg0NqU/T7MK9I1YJcFJz+ame7b0DdAoIBAFw3Sf9LbVVNh8ef4OqjBZR8RCYq 46 | 4HeG0FPYvMLzUtFi7j4uBfiL4+pNpSFvecSuLRKE8Pr5dPRJNPNgJud5gvuykBZX 47 | Q9ktQJTAPZK8Q5neLeXfAdoF3szJuEZbDdGSps4JFokVIX+h3c+uFRD9QMSh+bz1 48 | nEXCdYvmTs+bsTL+l7cbXq2iIKk1QnEcL+cRYr3VjP5xxZ/hGnuYqe9wmyo2MVkS 49 | NVUmCifIvE34TO072HH49gVPrhj9qIZsfBh4LBpl75eKwXTXx+HFqHhP8OfzuK6U 50 | v/JQn9JUGGzkmoMazQ9o5D5h/o0t/OGOPnQeqWL4BIPXdHv/dua6jLnAoU8= 51 | -----END RSA PRIVATE KEY----- 52 | -------------------------------------------------------------------------------- /retrieval/testdata/server.cer: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIDSzCCAjOgAwIBAgIJAPn0lI/95RQVMA0GCSqGSIb3DQEBBQUAMF8xCzAJBgNV 3 | BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg 4 | Q29tcGFueSBMdGQxGzAZBgNVBAMMElByb21ldGhldXMgVGVzdCBDQTAeFw0xNTA4 5 | MDQxNDE5MjRaFw00MjEyMjAxNDE5MjRaMFYxCzAJBgNVBAYTAlhYMRUwEwYDVQQH 6 | DAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQgQ29tcGFueSBMdGQxEjAQ 7 | BgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEB 8 | AMQhH0walZlA+Gy5ZB3YzzxZta7mhTX3P+yBeQ6G6yrei4H7gv+MTCJj5qUBc+BS 9 | cta8loKKUQWjoppjyh4tz8awkTD5sEyedE7/G3DS7mLgmx0PslwqrkXFBQhm/C2f 10 | aZfSO69TZ8uu1dgCmmGe9K2XqPnR6fu9egtLpK8RT0s/Cx04bFnaPS0ecyj+3q7A 11 | xzDsH84Z1KPo4LHgqNWlHqFsQPqH+7W9ajhF6lnO4ArEDJ3KuLDlgrENzCsDabls 12 | 0U2XsccBJzP+Ls+iQwMfKpx2ISQDHqniopSICw+sPufiAv+OGnnG6rGGWQjUstqf 13 | w4DnU4DZvkrcEWoGa6fq26kCAwEAAaMTMBEwDwYDVR0RBAgwBocEfwAAATANBgkq 14 | hkiG9w0BAQUFAAOCAQEAVPs8IZffawWuRqbXJSvFz7a1q95febWQFjvvMe8ZJeCZ 15 | y1k9laQ5ZLHYuQ6NUWn09UbQNtK3fCLF4sJx5PCPCp1vZWx4nJs8N5mNyqdQ1Zfk 16 | oyoYTOR2izNcIj6ZUFRoOR/7B9hl2JouCXrbExr96oO13xIfsdslScINz1X68oyW 17 | KjU0yUrY+lWG1zEkUGXti9K6ujtXa7YY2n3nK/CvIqny5nVToYUgEMpjUR9S+KgN 18 | JUtawY3VQKyp6ZXlHqa0ihsuvY9Hrlh14h0AsZchPAHUtDFv2nEQob/Kf1XynKw6 19 | itVKcj/UFpkhsnc/19aP1gWje76fejXl0tzyPXDXFg== 20 | -----END CERTIFICATE----- 21 | -------------------------------------------------------------------------------- /retrieval/testdata/server.key: -------------------------------------------------------------------------------- 1 | -----BEGIN RSA PRIVATE KEY----- 2 | MIIEpAIBAAKCAQEAxCEfTBqVmUD4bLlkHdjPPFm1ruaFNfc/7IF5DobrKt6LgfuC 3 | /4xMImPmpQFz4FJy1ryWgopRBaOimmPKHi3PxrCRMPmwTJ50Tv8bcNLuYuCbHQ+y 4 | XCquRcUFCGb8LZ9pl9I7r1Nny67V2AKaYZ70rZeo+dHp+716C0ukrxFPSz8LHThs 5 | Wdo9LR5zKP7ersDHMOwfzhnUo+jgseCo1aUeoWxA+of7tb1qOEXqWc7gCsQMncq4 6 | sOWCsQ3MKwNpuWzRTZexxwEnM/4uz6JDAx8qnHYhJAMeqeKilIgLD6w+5+IC/44a 7 | ecbqsYZZCNSy2p/DgOdTgNm+StwRagZrp+rbqQIDAQABAoIBACeOjqNo0TdhtTko 8 | gxrJ+bIwXcZy0/c4cPogeuwFJjU1QWnr8lXcVBazk3dAPcDGoEbTLoARqZm7kTYW 9 | XlOL5dYrEn2QPpCVfNvZ9AzjXhUvO9m2qsCQEyobPJKfQslo14E5c7Q+3DZmgtbY 10 | X47E4pCIgBoyzkBpzM2uaf6tPRLtv8QcLklcf7lP5rd0Zypc325RR6+J5nxfCoFp 11 | fD3sj7t/lJLS8Xb6m4/YFjsVJ2qEAelZ086v8unMBEj324Vv/VqrkPFtFNJKI+Az 12 | Pd9xFDBdsKijBn1Yam9/dj7CiyZYKaVZ9p/w7Oqkpbrt8J8S8OtNHZ4fz9FJgRu9 13 | uu+VTikCgYEA5ZkDmozDseA/c9JTUGAiPfAt5OrnqlKQNzp2m19GKh+Mlwg4k6O5 14 | uE+0vaQEfc0cX3o8qntWNsb63XC9h6oHewrdyVFMZNS4nzzmKEvGWt9ON6qfQDUs 15 | 1cgZ0Y/uKydDX/3hk/hnJbeRW429rk0/GTuSHHilBzhE0uXJ11xPG48CgYEA2q7a 16 | yqTdqPmZFIAYT9ny099PhnGYE6cJljTUMX9Xhk4POqcigcq9kvNNsly2O1t0Eq0H 17 | 2tYo91xTCZc3Cb0N+Vx3meLIljnzhEtwzU9w6W5VGJHWiqovjGwtCdm/W28OlMzY 18 | zM+0gVCJzZLhL0vOwBLwGUJvjgfpvgIb/W+C2UcCgYB5TJ3ayQOath7P0g6yKBfv 19 | ITUd+/zovzXx97Ex5OPs3T4pjO5XEejMt0+F4WF+FR8oUiw65W5nAjkHRMjdI7dQ 20 | Ci2ibpEttDTV7Bass1vYJqHsRvhbs7w8NbtuO9xYcCXoUPkcc+AKzTC+beQIckcj 21 | zZUj9Zk6dz/lLAG3Bc3FgQKBgQC+MmZI6auAU9Y4ZlC+4qi4bfkUzaefMCC+a6RC 22 | iKbvQOUt9j+k81h+fu6MuuYkKh6CP8wdITbwLXRrWwGbjrqgrzO2u/AJ+M07uwGZ 23 | EAb8f+GzROR8JhjE4TEq6B/uvmDIOoI1YFF2Rz4TdjQ0lpJzrAT3czjjJy68+8is 24 | XFhJ8QKBgQCMPpB7taMLQzuilEGabL6Xas9UxryiGoBHk4Umb107GVWgwXxWT6fk 25 | YSlvbMQHCgVeaJe374Bghyw33Z3WilWM1fCWya/CxXlw9wakjQHiqFCIOCxdgosX 26 | Sr35bRFWJMnHXD+jD0Vr8WrtbGzFSZb3ZrjT6WhWRIGCHcaMANN9ew== 27 | -----END RSA PRIVATE KEY----- 28 | -------------------------------------------------------------------------------- /retrieval/testdata/servername.cer: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIDOzCCAiMCCQDU4khDjkOJSTANBgkqhkiG9w0BAQsFADBfMQswCQYDVQQGEwJY 3 | WDEVMBMGA1UEBwwMRGVmYXVsdCBDaXR5MRwwGgYDVQQKDBNEZWZhdWx0IENvbXBh 4 | bnkgTHRkMRswGQYDVQQDDBJQcm9tZXRoZXVzIFRlc3QgQ0EwHhcNMTYwNTI2MjEx 5 | MjU5WhcNNDMxMDEyMjExMjU5WjBgMQswCQYDVQQGEwJBVTETMBEGA1UECBMKU29t 6 | ZS1TdGF0ZTEhMB8GA1UEChMYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMRkwFwYD 7 | VQQDExBwcm9tZXRoZXVzLnJvY2tzMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB 8 | CgKCAQEAylgjuph/zgi1A2KS0Iw/73zdom449Gw+DATApL1sKYUTAVBk0uDpEZIw 9 | fuYEAz6RbzBgzdYw10cmDCDDb0lNTBF4V08bGAXcYeJkKnIHRZprTPs7PWAai1jE 10 | 0H6ph+ThuHghPku7OAeyTvYyt5i0jkU2vgLSPa9wLciCfvwtd6S1gsthfEl8YsKH 11 | iEVE+5h4nLjzp8MIgGBNPhzQvwW8x6bp0whuVzOFRHR1VBeK5rxG0LbCVU3Q5oPV 12 | SLuRTkjQ6vNtm/qZPTw2mALjpRUrNxbA453aE33foJHb3gF85bSt67F7glFww5sq 13 | GtxTiju8t8gNy7UV0ROlkoC7o1pMswIDAQABMA0GCSqGSIb3DQEBCwUAA4IBAQCr 14 | Fp+7FSOrgZO9BdBwmsnpNiymaOdf4ydOAXt5fdqkmgAyoRY5hPhFGduAfgKxESTf 15 | tf8dKPV82j0EQR8EOu4qqDhXaKeZ69ZWMEkmpafO0MMixZ2/CeTV+z9DydLOZ2cC 16 | IFJihSiLNGh8E4AUFdujbWBcTdv4FafRAiEhQ98iMyYiKXC/wcFLkL/u5Lvhr8yw 17 | LGuaKwheDy41Q9Vdb2xlPbgDdibMlvOGxP1AWbE+/0fmmncwr7oeF6b4+mpMEDJS 18 | XCoX6MSBdDmo9Gw1yH6l4KrvAI+StLWWxK2qs8lkWzZjiNS+JPWDeNqJBRmG6Yxc 19 | Fl2KpVLCjhcNehUvg23x 20 | -----END CERTIFICATE----- 21 | -------------------------------------------------------------------------------- /retrieval/testdata/servername.key: -------------------------------------------------------------------------------- 1 | -----BEGIN RSA PRIVATE KEY----- 2 | MIIEogIBAAKCAQEAylgjuph/zgi1A2KS0Iw/73zdom449Gw+DATApL1sKYUTAVBk 3 | 0uDpEZIwfuYEAz6RbzBgzdYw10cmDCDDb0lNTBF4V08bGAXcYeJkKnIHRZprTPs7 4 | PWAai1jE0H6ph+ThuHghPku7OAeyTvYyt5i0jkU2vgLSPa9wLciCfvwtd6S1gsth 5 | fEl8YsKHiEVE+5h4nLjzp8MIgGBNPhzQvwW8x6bp0whuVzOFRHR1VBeK5rxG0LbC 6 | VU3Q5oPVSLuRTkjQ6vNtm/qZPTw2mALjpRUrNxbA453aE33foJHb3gF85bSt67F7 7 | glFww5sqGtxTiju8t8gNy7UV0ROlkoC7o1pMswIDAQABAoIBADZ5vETEQcRKe9FJ 8 | fJVA7QWg7FqKqjLD4YCC1wqDJNeYyCEWb86GVrkwTnYbnwDwm17/+0/vVn7e3NNv 9 | Dq6rYXAVU/zNg1HYYhjIRodW47ZNeI3lJXHEqeDSKUqojyPS7yIm1WxcHy9agxrX 10 | FZhwOEwFPlOxlsCcturcjKV7ZxJKftiWoyPodQLjlEmNoD/MQ6Obuge1dQZRLwCk 11 | /R+EcTWHN4A+rpnZLoKFEaw5p7DTjdKSGOu+EFB+lrEg5kTOCN/kR0PYGnDH1Ygd 12 | 6/DmP0xiPpT2pKudTtI7f+QoPtff+GJ47Xy1oYks/cXUJiJbtCT9wyKQtR5mZRUc 13 | ruNWBCECgYEA9e87HbUaMA4tAqaur684RTFAqpDjDBB8tDAxbnuQrv6947odgQHu 14 | YcBAneL2HIvUMuusI0X52nGRwt+qOSXiS1WQwA1P44qR28VYxLIkgK1xMEpezClU 15 | xIavMzwZtmjCZ84Q6H/qvVuqa5MuE4pe6O9vnb4cUWF280ngmf+zViUCgYEA0qAx 16 | qzh6cUBSF6PAV+7QKXB4YLfvLloX3qwC+qkdaGjacREb7URxTKs1lHLhpmHwoPN+ 17 | aXccxNs443Z67AK68N2RAOVw3z1IPTmSUzL7HCKqzZtRXsj+Lm8bj9sRzvWuE7RU 18 | X2QW+9ppAvjwwrhG0vXCs3yua2usMyHjr6ekw/cCgYBSut0qCyf6Dmq5v5R36PuG 19 | 2yCjwAWAo3Mvsh6OyeZL18nM92jBYwLrwx55fkXIKImDb6ACZaG9CAM+iLrcapAL 20 | Q4dj85ZyNsUGJwbLdBmvZ6jx07K7/xNS4PPCym7j2625+anabF1swY88jNAtJpjy 21 | xsjHSZKBFcZL5Qg3BbswOQKBgHigD/IMRWtot9scCAMUHRkudXKGxK9aH4OCJa6i 22 | fdoW+st4TfMjmHOdNfFPndWpD6NN8B68fbhsCHeUmi9iHOfnLK1DudHQCfguaZPG 23 | hbOGUyWvhvluyMuVDEbl4pwRbeGRDCUZcGRKoIt4QIJ0APO+lgQvKsEQiC08gmZN 24 | 73nfAoGAKXVVV7dN59gohMTRWsOSGP+YLEj8+rGZZYNKCLVTol0VQ7T30tA0P4Cf 25 | Dw9oLKGnDdgTtJA6Fsms858B6ANC+6Hxd9LG0ecOevKMBFHuWPm56Z0ofDzoPVBW 26 | eDuHeR5xF0xq5PIFl/mIJJ1NK0p1Do9gwqEEIftdNyrcGefGdXk= 27 | -----END RSA PRIVATE KEY----- 28 | -------------------------------------------------------------------------------- /snappy/snappy.go: -------------------------------------------------------------------------------- 1 | package snappy 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "io/ioutil" 7 | "sync" 8 | 9 | "github.com/golang/snappy" 10 | "google.golang.org/grpc/encoding" 11 | ) 12 | 13 | // Name is the name registered for the snappy compressor. 14 | const Name = "snappy" 15 | 16 | type compressor struct { 17 | writerPool sync.Pool 18 | readerPool sync.Pool 19 | } 20 | 21 | func init() { 22 | c := &compressor{} 23 | c.writerPool.New = func() interface{} { 24 | return &writer{Writer: snappy.NewBufferedWriter(ioutil.Discard), pool: &c.writerPool} 25 | } 26 | c.readerPool.New = func() interface{} { 27 | return &reader{Reader: snappy.NewReader(bytes.NewReader(nil)), pool: &c.readerPool} 28 | } 29 | encoding.RegisterCompressor(c) 30 | } 31 | 32 | type writer struct { 33 | *snappy.Writer 34 | pool *sync.Pool 35 | } 36 | 37 | func (c *compressor) Compress(w io.Writer) (io.WriteCloser, error) { 38 | z := c.writerPool.Get().(*writer) 39 | z.Reset(w) 40 | return z, nil 41 | } 42 | 43 | func (w *writer) Close() error { 44 | defer w.pool.Put(w) 45 | return w.Writer.Close() 46 | } 47 | 48 | type reader struct { 49 | *snappy.Reader 50 | pool *sync.Pool 51 | } 52 | 53 | func (c *compressor) Decompress(r io.Reader) (io.Reader, error) { 54 | z := c.readerPool.Get().(*reader) 55 | z.Reader.Reset(r) 56 | return z, nil 57 | } 58 | 59 | func (r *reader) Read(p []byte) (n int, err error) { 60 | n, err = r.Reader.Read(p) 61 | if err == io.EOF { 62 | r.pool.Put(r) 63 | } 64 | return n, err 65 | } 66 | 67 | func (c *compressor) Name() string { 68 | return Name 69 | } 70 | -------------------------------------------------------------------------------- /supervisor/supervisor_test.go: -------------------------------------------------------------------------------- 1 | // Copyright The OpenTelemetry Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package supervisor 16 | 17 | import ( 18 | "runtime" 19 | "testing" 20 | 21 | "github.com/lightstep/opentelemetry-prometheus-sidecar/telemetry" 22 | "github.com/stretchr/testify/require" 23 | ) 24 | 25 | func TestStackRegexp(t *testing.T) { 26 | buffer := make([]byte, 1<<14) 27 | stacksz := runtime.Stack(buffer, true) 28 | astack := buffer[:stacksz] 29 | 30 | s := New(Config{ 31 | Logger: telemetry.StaticLogger(), 32 | }) 33 | 34 | require.True(t, s.isStackdump(astack), "for stack %q", string(astack)) 35 | require.False(t, s.isStackdump([]byte(`some 36 | other 37 | long 38 | text 39 | `))) 40 | } 41 | -------------------------------------------------------------------------------- /tail/testdata/corruption/00000000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightstep/opentelemetry-prometheus-sidecar/a9be6a6ed05478cac75a89b6a463a029ae7900c1/tail/testdata/corruption/00000000 -------------------------------------------------------------------------------- /tail/testdata/invalid-segment/00000000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightstep/opentelemetry-prometheus-sidecar/a9be6a6ed05478cac75a89b6a463a029ae7900c1/tail/testdata/invalid-segment/00000000 -------------------------------------------------------------------------------- /tail/testdata/invalid-segment/00000001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightstep/opentelemetry-prometheus-sidecar/a9be6a6ed05478cac75a89b6a463a029ae7900c1/tail/testdata/invalid-segment/00000001 -------------------------------------------------------------------------------- /telemetry/README.md: -------------------------------------------------------------------------------- 1 | This package was copied from Lightstep's [OpenTelemetry Go 2 | Launcher](https://github.com/lightstep/otel-launcher-go). It was 3 | modified as follows: 4 | 5 | - Use of `go-kit/log` logging API for consistency with this code base 6 | - Remove the use of environment variables, as this code base prefers configuration files 7 | - Remove Lightstep-specific functionality 8 | - Standard log package and gRPC logging integration. 9 | -------------------------------------------------------------------------------- /telemetry/cmd/sidecar-telemetry-test/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "net" 8 | "net/url" 9 | "os" 10 | "strings" 11 | "time" 12 | 13 | "github.com/go-kit/kit/log/level" 14 | "github.com/lightstep/opentelemetry-prometheus-sidecar/telemetry" 15 | "go.opentelemetry.io/otel" 16 | ) 17 | 18 | var logger = telemetry.DefaultLogger() 19 | 20 | func main() { 21 | if err := Main(); err != nil { 22 | level.Error(logger).Log("msg", err) 23 | os.Exit(1) 24 | } 25 | } 26 | 27 | func Main() error { 28 | telemetry.StaticSetup(logger) 29 | telemetry.SetVerboseLevel(99) 30 | 31 | if len(os.Args) < 2 { 32 | return fmt.Errorf("usage: %s https://ingest.data.com:443 Header=Value ...", os.Args[0]) 33 | } 34 | 35 | endpointURL, err := url.Parse(os.Args[1]) 36 | if err != nil { 37 | return err 38 | } 39 | 40 | headers := map[string]string{} 41 | for _, hdr := range os.Args[2:] { 42 | kv := strings.SplitN(hdr, "=", 2) 43 | headers[kv[0]] = kv[1] 44 | } 45 | 46 | address := endpointURL.Hostname() 47 | if len(endpointURL.Port()) > 0 { 48 | address = net.JoinHostPort(address, endpointURL.Port()) 49 | } 50 | 51 | insecure := false 52 | switch endpointURL.Scheme { 53 | case "http": 54 | insecure = true 55 | case "https": 56 | default: 57 | return fmt.Errorf("invalid endpoint, use https:// or http://") 58 | } 59 | 60 | defer telemetry.ConfigureOpentelemetry( 61 | telemetry.WithLogger(logger), 62 | telemetry.WithSpanExporterEndpoint(address), 63 | telemetry.WithSpanExporterInsecure(insecure), 64 | telemetry.WithMetricsExporterEndpoint(address), 65 | telemetry.WithMetricsExporterInsecure(insecure), 66 | telemetry.WithHeaders(headers), 67 | telemetry.WithResourceAttributes(map[string]string{ 68 | "service.name": "sidecar-telemetry-test", 69 | }), 70 | ).Shutdown(context.Background()) 71 | 72 | otel.Handle(fmt.Errorf("printing OTel error")) 73 | 74 | log.Print("printing STDLOG error") 75 | 76 | level.Info(logger).Log("msg", "sending OTLP", "endpoint", endpointURL) 77 | 78 | tracer := otel.Tracer("sidecar-telemetry-test") 79 | for { 80 | _, span := tracer.Start(context.Background(), "ping") 81 | time.Sleep(time.Second) 82 | span.End() 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /telemetry/doevery/doevery.go: -------------------------------------------------------------------------------- 1 | // Copyright Lightstep Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // package doevery provides primitives for per-call-site rate-limiting. 16 | package doevery 17 | 18 | import ( 19 | "fmt" 20 | "runtime" 21 | "sync" 22 | "time" 23 | ) 24 | 25 | var ( 26 | // mu protects below. 27 | mu sync.Mutex 28 | 29 | // mostRecentInvocationMap maintains the last time the function passed at 30 | // the caller's program counter (PC) was called. 31 | // TODO: consider sharding this e.g. [8]map[invocationKey]time.Time 32 | // if it bottlenecks. 33 | mostRecentInvocationMap = make(map[invocationKey]time.Time) 34 | ) 35 | 36 | // invocationKey is an identifier for a unique 37 | // line of source code. 38 | type invocationKey struct { 39 | // file is the filename used to compile the 40 | // line of code we are rate-limiting e.g. 41 | // go/src/pkg/main.go 42 | file string 43 | // line is the line number in file corresponding to 44 | // the line of code we are rate-limiting. 45 | line int 46 | } 47 | 48 | // TimePeriod rate limits each call site of this by the duration specified 49 | // as the first argument. This may be useful in logging scenarios, where 50 | // you only want to log every few seconds - or every second - instead of 51 | // tens of hundreds of times per second. 52 | // 53 | // Each unique call site that calls TimePeriod is rate-limited independently. 54 | // Each invocation of TimePeriod at the same call-site should provide the 55 | // same duration. 56 | // 57 | // Example usage: 58 | // end := time.Now().Add(5 * time.Second) 59 | // for end.After(time.Now()) { 60 | // doevery.TimePeriod(1*time.Second, func() { 61 | // fmt.Println("This will only appear once per second.") 62 | // }) 63 | // } 64 | // 65 | // Please note that each individual thread does not have a distinct 66 | // rate-limit; the rate-limit is global for the file/line. 67 | // 68 | // TimePeriod is safe for concurrent use. 69 | func TimePeriod(dur time.Duration, f func()) { 70 | if dur < 0 { 71 | panic(fmt.Sprintf("negative duration unsupported: %v", dur)) 72 | } 73 | // Find our unique location so we can check when we last invoked f. 74 | // Skip 0 is us (TimePeriod); skip 1 is our caller. 75 | _, file, line, ok := runtime.Caller(1) 76 | if !ok { 77 | // If we don't know our own caller, we can't help. 78 | // We can either fail open or fail closed, here we choose 79 | // to fail open. 80 | f() 81 | return 82 | } 83 | 84 | // Use the file/line as the source-of-truth for 85 | // deduping invocations. We can't use the program counter (PC) 86 | // as the PC can differ for the same LoC that is 87 | // inlined in multiple places. 88 | key := invocationKey{ 89 | file: file, 90 | line: line, 91 | } 92 | 93 | shouldInvoke := func() bool { 94 | mu.Lock() 95 | defer mu.Unlock() 96 | 97 | prevInvocation, ok := mostRecentInvocationMap[key] 98 | 99 | invoking := !ok || time.Since(prevInvocation) > dur 100 | if invoking { 101 | mostRecentInvocationMap[key] = time.Now() 102 | } 103 | return invoking 104 | }() 105 | 106 | if !shouldInvoke { 107 | // Just return early, nothing to change. 108 | return 109 | } 110 | 111 | // Invoke. We already updated the time. 112 | f() 113 | } 114 | -------------------------------------------------------------------------------- /telemetry/doevery/doevery_test.go: -------------------------------------------------------------------------------- 1 | package doevery 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "sync/atomic" 7 | "testing" 8 | "time" 9 | 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | func TestBasic(t *testing.T) { 14 | end := time.Now().Add(2 * time.Second) 15 | var invocations int 16 | for time.Now().Before(end) { 17 | TimePeriod(1*time.Second, func() { 18 | t.Logf("running at %v", time.Now()) 19 | invocations++ 20 | }) 21 | time.Sleep(100 * time.Millisecond) 22 | } 23 | // It should be less, but with a little slop. 24 | // Without TimePeriod, we anticipate 20 invocations. 25 | // With TimePeriod anticipate 2 invocations. 26 | require.Less(t, invocations, 5) 27 | } 28 | 29 | func TestZero(t *testing.T) { 30 | end := time.Now().Add(3 * time.Second) 31 | var invocations int 32 | for time.Now().Before(end) { 33 | if invocations > 0 { 34 | break 35 | } 36 | TimePeriod(0, func() { 37 | t.Logf("running at %v", time.Now()) 38 | invocations++ 39 | }) 40 | time.Sleep(100 * time.Millisecond) 41 | } 42 | // Basically just check that it ever executes. 43 | require.Greater(t, invocations, 0) 44 | } 45 | 46 | func TestConcurrentSamePC(t *testing.T) { 47 | var wg sync.WaitGroup 48 | var invocations int64 49 | 50 | end := time.Now().Add(2 * time.Second) 51 | 52 | for i := 0; i < 5; i++ { 53 | wg.Add(1) 54 | go func() { 55 | defer wg.Done() 56 | 57 | for time.Now().Before(end) { 58 | TimePeriod(1*time.Second, func() { 59 | t.Logf("running at %v", time.Now()) 60 | atomic.AddInt64(&invocations, 1) 61 | }) 62 | time.Sleep(100 * time.Millisecond) 63 | } 64 | }() 65 | } 66 | wg.Wait() 67 | // It should be less, but with a little slop. 68 | // Without TimePeriod, we anticipate 20 invocations. 69 | // With TimePeriod anticipate 2 invocations. 70 | require.Less(t, invocations, int64(5)) 71 | } 72 | 73 | func TestConcurrentDifferentPC(t *testing.T) { 74 | var wg sync.WaitGroup 75 | var invocations int64 76 | 77 | wg.Add(1) 78 | go func() { 79 | defer wg.Done() 80 | 81 | end := time.Now().Add(2 * time.Second) 82 | for time.Now().Before(end) { 83 | TimePeriod(1*time.Second, func() { 84 | t.Logf("running (0) at %v", time.Now()) 85 | atomic.AddInt64(&invocations, 1) 86 | }) 87 | time.Sleep(100 * time.Millisecond) 88 | } 89 | }() 90 | 91 | wg.Add(1) 92 | go func() { 93 | defer wg.Done() 94 | 95 | end := time.Now().Add(2 * time.Second) 96 | for time.Now().Before(end) { 97 | TimePeriod(1*time.Second, func() { 98 | t.Logf("running (1) at %v", time.Now()) 99 | atomic.AddInt64(&invocations, 1) 100 | }) 101 | time.Sleep(100 * time.Millisecond) 102 | } 103 | }() 104 | 105 | wg.Add(1) 106 | go func() { 107 | defer wg.Done() 108 | 109 | end := time.Now().Add(2 * time.Second) 110 | for time.Now().Before(end) { 111 | TimePeriod(1*time.Second, func() { 112 | t.Logf("running (2) at %v", time.Now()) 113 | atomic.AddInt64(&invocations, 1) 114 | }) 115 | time.Sleep(100 * time.Millisecond) 116 | } 117 | }() 118 | 119 | wg.Wait() 120 | // We expect exactly 6. Allow a little slop. 121 | require.GreaterOrEqual(t, invocations, int64(4)) 122 | } 123 | 124 | func BenchmarkDoEvery(b *testing.B) { 125 | invocations := 0 126 | for i := 0; i < b.N; i++ { 127 | TimePeriod(1, func() { 128 | invocations++ 129 | }) 130 | } 131 | if invocations != b.N { 132 | b.Fatal(fmt.Sprintf("incorrectness: %v != %v", invocations, b.N)) 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /telemetry/static.go: -------------------------------------------------------------------------------- 1 | // Copyright Lightstep Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package telemetry 16 | 17 | import ( 18 | "bytes" 19 | "context" 20 | "fmt" 21 | stdlog "log" 22 | "os" 23 | "os/signal" 24 | "sync" 25 | "sync/atomic" 26 | "syscall" 27 | 28 | "github.com/go-kit/kit/log" 29 | "github.com/go-kit/kit/log/level" 30 | "github.com/go-logfmt/logfmt" 31 | "go.opentelemetry.io/otel" 32 | "google.golang.org/grpc/grpclog" 33 | ) 34 | 35 | // This file adds logging configurations for: 36 | // - "log" logger 37 | // - "google.golang.org/grpc/grpclog" log handler 38 | // - "go.opentelemetry.io/otel" error handler 39 | 40 | type deferLogger struct { 41 | lock sync.Mutex 42 | delegate log.Logger 43 | } 44 | 45 | var ( 46 | uninitializedLogKVS = []interface{}{ 47 | "logging", "uninitialized", 48 | } 49 | 50 | staticLogger = log.With(&staticDeferred) 51 | 52 | staticDeferred deferLogger 53 | 54 | verboseLevel atomic.Value 55 | ) 56 | 57 | func SetVerboseLevel(level int) { 58 | verboseLevel.Store(level) 59 | } 60 | 61 | func VerboseLevel() int { 62 | return verboseLevel.Load().(int) 63 | } 64 | 65 | func (dl *deferLogger) Log(kvs ...interface{}) error { 66 | staticDeferred.lock.Lock() 67 | delegate := dl.delegate 68 | staticDeferred.lock.Unlock() 69 | 70 | if delegate == nil { 71 | kvs = append(kvs[:len(kvs):len(kvs)], uninitializedLogKVS...) 72 | var buf bytes.Buffer 73 | enc := logfmt.NewEncoder(&buf) 74 | _ = enc.EncodeKeyvals(kvs...) 75 | _, _ = fmt.Fprintln(os.Stderr, buf.String()) 76 | return nil 77 | } 78 | return delegate.Log(kvs...) 79 | } 80 | 81 | func StaticLogger() log.Logger { 82 | return staticLogger 83 | } 84 | 85 | func init() { 86 | verboseLevel.Store(int(0)) 87 | 88 | // Note: the NewStdlibAdapter requires one of the file options for correctness. 89 | stdlog.SetFlags(stdlog.Ldate | stdlog.Ltime | stdlog.Lmicroseconds | stdlog.Lshortfile) 90 | stdlog.SetOutput(log.NewStdlibAdapter( 91 | log.With(staticLogger, "component", "stdlog"), 92 | )) 93 | 94 | otel.SetErrorHandler(newForOTel(log.With(staticLogger, "component", "otel"))) 95 | 96 | grpclog.SetLoggerV2(newForGRPC(log.With(staticLogger, "component", "grpc"))) 97 | } 98 | 99 | func StaticSetup(logger log.Logger) { 100 | staticDeferred.lock.Lock() 101 | defer staticDeferred.lock.Unlock() 102 | staticDeferred.delegate = logger 103 | } 104 | 105 | // ContextWithSIGTERM returns a context that will be cancelled on SIGTERM. 106 | func ContextWithSIGTERM(logger log.Logger) (context.Context, context.CancelFunc) { 107 | ctx, cancelMain := context.WithCancel(context.Background()) 108 | 109 | go func() { 110 | defer cancelMain() 111 | 112 | term := make(chan os.Signal) 113 | signal.Notify(term, os.Interrupt, syscall.SIGTERM) 114 | select { 115 | case <-term: 116 | level.Warn(logger).Log("msg", "received SIGTERM, exiting...") 117 | case <-ctx.Done(): 118 | break 119 | } 120 | }() 121 | 122 | return ctx, cancelMain 123 | } 124 | 125 | type forOTel struct { 126 | logger log.Logger 127 | } 128 | 129 | func newForOTel(l log.Logger) forOTel { 130 | return forOTel{ 131 | logger: level.Error(l), 132 | } 133 | } 134 | 135 | func (l forOTel) Handle(err error) { 136 | if err == nil { 137 | return 138 | } 139 | l.logger.Log("err", err) 140 | } 141 | 142 | type forGRPC struct { 143 | loggers [3]log.Logger 144 | } 145 | 146 | func newForGRPC(l log.Logger) forGRPC { 147 | // The gRPC logger here could be extended with configurable 148 | // verbosity. As this stands, turn off gRPC Info and Verbose 149 | // logs. 150 | return forGRPC{ 151 | loggers: [3]log.Logger{ 152 | level.Info(l), 153 | level.Warn(l), 154 | level.Error(l), 155 | }, 156 | } 157 | } 158 | 159 | // The Info methods are disabled until some degree of verbosity is 160 | // set; the main() function adds 1 to verbosity so that 161 | // --log.level=debug enables gRPC logging in this way. 162 | 163 | func (l forGRPC) Info(args ...interface{}) { 164 | if VerboseLevel() <= 0 { 165 | return 166 | } 167 | l.loggers[0].Log("msg", fmt.Sprint(args...)) 168 | } 169 | 170 | func (l forGRPC) Infoln(args ...interface{}) { 171 | if VerboseLevel() <= 0 { 172 | return 173 | } 174 | l.loggers[0].Log("msg", fmt.Sprintln(args...)) 175 | } 176 | 177 | func (l forGRPC) Infof(format string, args ...interface{}) { 178 | if VerboseLevel() <= 0 { 179 | return 180 | } 181 | l.loggers[0].Log("msg", fmt.Sprintf(format, args...)) 182 | } 183 | 184 | func (l forGRPC) V(level int) bool { 185 | return level <= VerboseLevel() 186 | } 187 | 188 | func (l forGRPC) Warning(args ...interface{}) { 189 | l.loggers[1].Log("msg", fmt.Sprint(args...)) 190 | } 191 | 192 | func (l forGRPC) Warningln(args ...interface{}) { 193 | l.loggers[1].Log("msg", fmt.Sprintln(args...)) 194 | } 195 | 196 | func (l forGRPC) Warningf(format string, args ...interface{}) { 197 | l.loggers[1].Log("msg", fmt.Sprintf(format, args...)) 198 | } 199 | 200 | func (l forGRPC) Error(args ...interface{}) { 201 | l.loggers[2].Log("msg", fmt.Sprint(args...)) 202 | } 203 | 204 | func (l forGRPC) Errorln(args ...interface{}) { 205 | l.loggers[2].Log("msg", fmt.Sprintln(args...)) 206 | } 207 | 208 | func (l forGRPC) Errorf(format string, args ...interface{}) { 209 | l.loggers[2].Log("msg", fmt.Sprintf(format, args...)) 210 | } 211 | 212 | func (l forGRPC) Fatal(args ...interface{}) { 213 | l.loggers[2].Log("fatal", fmt.Sprint(args...)) 214 | os.Exit(2) 215 | } 216 | 217 | func (l forGRPC) Fatalln(args ...interface{}) { 218 | l.loggers[2].Log("fatal", fmt.Sprintln(args...)) 219 | os.Exit(2) 220 | } 221 | 222 | func (l forGRPC) Fatalf(format string, args ...interface{}) { 223 | l.loggers[2].Log("fatal", fmt.Sprintf(format, args...)) 224 | os.Exit(2) 225 | } 226 | -------------------------------------------------------------------------------- /telemetry/telemetry_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Lightstep Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package telemetry 16 | 17 | import ( 18 | "bytes" 19 | "context" 20 | "fmt" 21 | "os" 22 | "strings" 23 | "sync" 24 | "testing" 25 | 26 | "github.com/go-kit/kit/log" 27 | "github.com/go-kit/kit/log/level" 28 | "github.com/go-logfmt/logfmt" 29 | "github.com/stretchr/testify/assert" 30 | ) 31 | 32 | const ( 33 | expectedTracingDisabledMessage = "tracing is disabled: no endpoint set" 34 | expectedMetricsDisabledMessage = "metrics are disabled: no endpoint set" 35 | ) 36 | 37 | type testLogger struct { 38 | lock sync.Mutex 39 | outputX []string 40 | } 41 | 42 | func (logger *testLogger) addOutput(output string) { 43 | logger.lock.Lock() 44 | defer logger.lock.Unlock() 45 | logger.outputX = append(logger.outputX, output) 46 | } 47 | 48 | func (logger *testLogger) Output() []string { 49 | logger.lock.Lock() 50 | defer logger.lock.Unlock() 51 | return logger.outputX 52 | } 53 | 54 | func (logger *testLogger) Log(kvs ...interface{}) error { 55 | var buf bytes.Buffer 56 | enc := logfmt.NewEncoder(&buf) 57 | err := enc.EncodeKeyvals(kvs...) 58 | if err != nil { 59 | panic(err) 60 | } 61 | logger.addOutput(buf.String()) 62 | return nil 63 | } 64 | 65 | func (logger *testLogger) requireContains(t *testing.T, expected string) { 66 | t.Helper() 67 | for _, output := range logger.Output() { 68 | if strings.Contains(output, expected) { 69 | return 70 | } 71 | } 72 | 73 | t.Errorf("\nString unexpectedly not found: %v\nIn: %v", expected, logger.Output()) 74 | } 75 | 76 | func (logger *testLogger) requireNotContains(t *testing.T, expected string) { 77 | t.Helper() 78 | for _, output := range logger.Output() { 79 | if strings.Contains(output, expected) { 80 | t.Errorf("\nString unexpectedly found: %v\nIn: %v", expected, logger.Output()) 81 | return 82 | } 83 | } 84 | } 85 | 86 | func (logger *testLogger) reset() { 87 | logger.lock.Lock() 88 | defer logger.lock.Unlock() 89 | logger.outputX = nil 90 | } 91 | 92 | type testErrorHandler struct { 93 | } 94 | 95 | func (t *testErrorHandler) Handle(err error) { 96 | fmt.Printf("test error handler handled error: %v\n", err) 97 | } 98 | 99 | func testEndpointDisabled(t *testing.T, expected string, opts ...Option) { 100 | logger := &testLogger{} 101 | lsOtel := ConfigureOpentelemetry( 102 | append(opts, 103 | WithLogger(logger), 104 | )..., 105 | ) 106 | defer lsOtel.Shutdown(context.Background()) 107 | 108 | logger.requireContains(t, expected) 109 | } 110 | 111 | func TestTraceEndpointDisabled(t *testing.T) { 112 | testEndpointDisabled( 113 | t, 114 | expectedTracingDisabledMessage, 115 | WithSpanExporterEndpoint(""), 116 | WithMetricsExporterEndpoint("https://otlp"), 117 | ) 118 | } 119 | 120 | func TestMetricEndpointDisabled(t *testing.T) { 121 | testEndpointDisabled( 122 | t, 123 | expectedMetricsDisabledMessage, 124 | WithMetricsExporterEndpoint(""), 125 | WithSpanExporterEndpoint("https://otlp"), 126 | ) 127 | } 128 | 129 | func TestValidConfig1(t *testing.T) { 130 | logger, _ := filterDebugLogs() 131 | 132 | lsOtel := ConfigureOpentelemetry( 133 | WithLogger(logger), 134 | ) 135 | defer lsOtel.Shutdown(context.Background()) 136 | 137 | logger.requireContains(t, expectedMetricsDisabledMessage) 138 | } 139 | 140 | func filterDebugLogs() (*testLogger, log.Logger) { 141 | tl := &testLogger{} 142 | return tl, level.NewFilter(tl, level.AllowInfo()) 143 | } 144 | 145 | func TestDebugEnabled(t *testing.T) { 146 | logger, _ := filterDebugLogs() 147 | 148 | lsOtel := ConfigureOpentelemetry( 149 | WithLogger(logger), 150 | WithSpanExporterEndpoint("localhost:443"), 151 | WithResourceAttributes(map[string]string{ 152 | "attr1": "val1", 153 | "host.name": "host456", 154 | }), 155 | ) 156 | defer lsOtel.Shutdown(context.Background()) 157 | output := strings.Join(logger.Output(), ",") 158 | assert.Contains(t, output, "level=debug") 159 | assert.Contains(t, output, "localhost:443") 160 | } 161 | 162 | type TestCarrier struct { 163 | values map[string]string 164 | } 165 | 166 | func (t TestCarrier) Get(key string) string { 167 | return t.values[key] 168 | } 169 | 170 | func (t TestCarrier) Set(key string, value string) { 171 | t.values[key] = value 172 | } 173 | 174 | func TestMain(m *testing.M) { 175 | os.Exit(m.Run()) 176 | } 177 | -------------------------------------------------------------------------------- /telemetry/timer.go: -------------------------------------------------------------------------------- 1 | // Copyright Lightstep Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package telemetry 16 | 17 | import ( 18 | "context" 19 | "time" 20 | 21 | sidecar "github.com/lightstep/opentelemetry-prometheus-sidecar" 22 | "go.opentelemetry.io/otel/attribute" 23 | "go.opentelemetry.io/otel/metric" 24 | ) 25 | 26 | type ( 27 | // Timer is a simple instrument for measuring a section of 28 | // code, a kind of light-weight span that outputs a 29 | // ValueRecorder of its duration. 30 | Timer metric.Float64Histogram 31 | 32 | Timing struct { 33 | ctx context.Context 34 | timer Timer 35 | started time.Time 36 | } 37 | 38 | Counter metric.Int64Counter 39 | ) 40 | 41 | func NewTimer(name, desc string) Timer { 42 | return Timer(sidecar.OTelMeterMust.NewFloat64Histogram( 43 | name, 44 | metric.WithDescription(desc), 45 | metric.WithUnit("s"), 46 | )) 47 | } 48 | 49 | func (t Timer) Start(ctx context.Context) Timing { 50 | return Timing{ 51 | ctx: ctx, 52 | timer: t, 53 | started: time.Now(), 54 | } 55 | } 56 | 57 | func (t Timing) Stop(err *error, kvs ...attribute.KeyValue) { 58 | errorval := "false" 59 | if err != nil && *err != nil { 60 | errorval = "true" 61 | } 62 | 63 | kvs = append(kvs, attribute.String("error", errorval)) 64 | 65 | metric.Float64Histogram(t.timer).Record(t.ctx, time.Since(t.started).Seconds(), kvs...) 66 | } 67 | 68 | func NewCounter(name, desc string) Counter { 69 | return Counter(sidecar.OTelMeterMust.NewInt64Counter( 70 | name, 71 | metric.WithDescription(desc), 72 | )) 73 | } 74 | 75 | func (c Counter) Add(ctx context.Context, cnt int64, err *error, kvs ...attribute.KeyValue) { 76 | errorval := "false" 77 | if err != nil && *err != nil { 78 | errorval = "true" 79 | } 80 | 81 | kvs = append(kvs, attribute.String("error", errorval)) 82 | 83 | metric.Int64Counter(c).Add(ctx, cnt, kvs...) 84 | } 85 | --------------------------------------------------------------------------------